# Load Necessary Packages ------------------------------------------------------

#install.packages("pacman")
library(pacman)

# tidyverse
p_load(dplyr, tidyr, purrr, ggplot2, ggrepel, patchwork, stringr, readr, forcats,
       ggalluvial, ggridges)

# misc
p_load(here, foreign, readstata13, srvyr, WDI, jrvFinance, modelsummary, gt,
       fixest, margins, sandwich, estimatr, magick, sjPlot)

# custom function, used to create "NA" data entries using the ESMAP 888 code or -ve values
naconvert <- function(x) {
  return(ifelse(x < 0 | x == 888 | x == "888", NA, x))
}

# custom function, used to convert "NA" data entries to zero, for conditional questions
natozero <- function(x) {
  return(ifelse(is.na(x), 0, x))
}

# custom function for weighted ecdf plots
source(here("R", "stat_ecdf_weighted.R"))

options(scipen = 999)

# RWANDA -----------------------------------------------------------------------

rwanda_main <- read.dta13(here("Data", "rwanda", "Main dataset.dta"), 
                          convert.factors = F)

rwanda_hh <- read.dta13(here("Data", "rwanda", "Section A.dta"), 
                        convert.factors = F)

rwanda_solar <- read.dta13(here("Data", "rwanda", "Section C.dta"), 
                           convert.factors = F)

rwanda_apps <- read.dta13(here("Data", "rwanda", "Section L.dta"),
                          nonint.factors = T)

rwanda_fuel <- read.dta13(here::here("Data", "rwanda", "Section H.dta"), 
                          convert.factors = F)

rwanda_stove <- read.dta13(here::here("Data", "rwanda", "Section I.dta"), 
                           convert.factors = F)

gen <- rwanda_main %>% 
  transmute(HHID = HHID,
            rural = as.numeric(Locality == 0),
            admin1 = Province,
            admin2 = District,
            strata = paste0(Province, "_", rural), # sample is stratified by region and rural/urban
            psu = cluster,
            weight = sample_weight)

hh <- rwanda_hh %>% 
  group_by(HHID) %>% 
  summarise(
    femalehead = max(as.numeric(A3 == 2 & A4 == 1), na.rm = T),
    educ10th = max(as.numeric(A9 %in% c(2:7)), na.rm = T),
    nonfarmemp = max(as.numeric(A12 %in% c(1,3,4)))) %>% 
  mutate(hh_exp_annual = NA) # No expenditures data gathered in Rwanda

# Calculate electricity availability, consumption and expenditures
avail <- rwanda_main %>% 
  transmute(HHID = HHID,
            grid = as.numeric(C2 == 1),
            mg = as.numeric(C42 == 1),
            solar = as.numeric(C143 == 1),
            batt = as.numeric(C113 == 1),
            dg = as.numeric(C84 == 1),
            availability_grid = C26B,
            availability_eve_grid = C27B,
            availability_mg = C68B,
            availability_eve_mg = C69B,
            availability_solar = C172B,
            availability_eve_solar = C173B,
            availability_dg = C107B,
            availability_eve_dg = C108B,
            availability_batt = C127) %>% 
  mutate_all(~naconvert(.))

avail <- avail %>% 
  rowwise() %>% 
  transmute(HHID = HHID,
            availability = max(availability_grid, availability_mg, 
                               availability_solar, availability_batt, availability_dg,
                               na.rm = T),
            availability = ifelse(availability < 0, 0, availability), # Cases where max(..) returns -Inf as no modern electricity access
            availability_eve = max(availability_eve_grid, availability_eve_mg, 
                                   availability_eve_solar, availability_batt,
                                   availability_eve_dg,
                               na.rm = T),
            availability_eve = ifelse(availability_eve < 0, 0, availability_eve),
            grid = grid, mg = mg, solar = solar, batt = batt, dg = dg)

elexp <- rwanda_main %>% 
  transmute(
    HHID = HHID,
    exp_grid = C21,
    exp_mg = C63,
    exp_batt = C124) %>% 
  mutate(across(exp_grid:exp_batt, ~naconvert(.)))

dgexp <- rwanda_main %>% 
  select(
    HHID,
    dg_om = C99,
    dg_fuel = C104) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  mutate(exp_dg = dg_om/12 + dg_fuel) %>% 
  group_by(HHID) %>% 
  summarise(exp_dg = sum(exp_dg, na.rm = T)) %>% 
  ungroup()

solarexp <- rwanda_solar %>% 
  select(
    HHID,
    solar_capex = C160,
    solar_payg = C162) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  mutate(exp_solar = round(annuity.instalment(0.1, n.periods = 5, pv = solar_capex)/12 + 
           solar_payg,0)) %>% 
  group_by(HHID) %>% 
  summarise(exp_solar = sum(exp_solar, na.rm = T)) %>% 
  ungroup()

elexp <- left_join(elexp, solarexp) %>%
  left_join(dgexp) %>% 
  transmute(HHID = HHID,
            elexp_total = select(.,exp_grid:exp_dg) %>% rowSums(., na.rm = T),
            elexp_total = elexp_total * 12)  
  
elcons <- rwanda_main %>% 
  mutate(across(c(C22,C64), ~natozero(.))
  ) %>% 
  transmute(
    HHID = HHID,
    kwh_consumption = select(.,c(C22,C64)) %>% rowSums(.,na.rm = T))

# Calculate AF services tier
apps <- rwanda_apps %>% 
  select(HHID, Item, La) %>% 
  spread(Item, La)

names(apps) <-make.names(names(apps),unique = TRUE)

apps <- apps %>% 
  transmute(
    HHID = HHID,
    srv_minimum = case_when(
      rowSums(select(., Incandescent.Light.Bulb, Fluorescent.Tube, 
                     Compact.Fluorescent.Light..CFL..Bulb, LED.Light.Bulb)) > 0 &
      rowSums(select(., Smartphone..internet.phone..charger,
                     Regular.mobile.phone.charger)) > 0 ~ 1,
      TRUE ~ 0),
    srv_decent = case_when(
      rowSums(select(., Black...White.TV:Flat.color.TV, 
                     Fan, Air.cooler..External.Unit.:Space.Heater,
                     Refrigerator)) > 0 ~ 1,
      TRUE ~ 0),
    srv_affluent = case_when(
      rowSums(select(., Freezer, Washing.machine,
                     Electric.stove.range, Microwave.oven)) > 0 ~ 1,
      TRUE ~ 0)
    ) %>% 
  mutate(
    srv_minimum = as.numeric(
      srv_minimum == 1 & rowSums(select(., srv_decent:srv_affluent)) == 0),
    srv_decent = as.numeric(
      srv_decent == 1 & srv_affluent == 0),
    srv_affluent = srv_affluent,
    srv_none = as.numeric(rowSums(select(., srv_minimum:srv_affluent)) == 0)
  )

# Extract cooking fuel expenditures
fuel <- rwanda_fuel %>% 
  filter(H5 == 1) %>% 
  mutate(H13 = naconvert(H13)) %>% 
  group_by(HHID) %>% 
  summarise(cookexp_total = sum(H13, na.rm = T)) %>% 
  mutate(cookexp_total = cookexp_total * 12) %>% 
  full_join(hh %>% select(HHID))

# Extract stated primary stove and classify whether clean
stove <- rwanda_stove %>%
  transmute(HHID = HHID,
            stoveid = I2,
            primarystoveclean_stated = ifelse(I18A %in% c(6,14,15,16,17), "clean", "solids"))

stove <- rwanda_main %>% 
  select(HHID, stoveid = I35) %>% 
  left_join(stove, by = c("HHID", "stoveid")) %>% 
  select(-stoveid) %>% 
  mutate(primarystoveclean_stated = as.numeric(primarystoveclean_stated == "clean" &
                                                 !is.na(primarystoveclean_stated)))

# Extract primary stove by timeuse (AF) (selects solidfuel stove in edge case)
stove_af <- rwanda_stove %>% 
  mutate(across(I24:I26, ~naconvert(.))) %>% 
  mutate(primarystovetype_af = ifelse(I18A %in% c(6,14,15,16,17),"clean","solids")
  ) %>% 
  group_by(HHID, primarystovetype_af) %>% 
  summarise(across(c(I24:I26), ~sum(.,na.rm = T))) %>% 
  group_by(HHID, primarystovetype_af) %>% 
  mutate(stoveuseminutes = I24+I25+I26) %>% 
  pivot_wider(id_cols = HHID, names_from = primarystovetype_af,
              values_from = stoveuseminutes, names_prefix = "stoveuseminutes_") %>% 
  mutate(across(matches("stoveuseminutes"), ~natozero(.))
  ) %>% 
  rowwise() %>% 
  mutate(
    stoveuseminutes_clean = 
      ifelse((stoveuseminutes_solids + stoveuseminutes_clean) == 0, NA, stoveuseminutes_clean),
    stoveuseminutes_solids = 
      ifelse(is.na(stoveuseminutes_clean), NA, stoveuseminutes_solids),
  ) # Creates NAs in case both clean and solid fuel timeuse is 0

# Extract clean fuel usage months
fuelmths <- rwanda_fuel %>% 
  filter(H2 %in% c(1,5,11), H5 == 1) %>% 
  mutate(across(H9_1:H9_12, ~naconvert(.))
  ) %>% 
  transmute(
    HHID = HHID,
    across(H9_1:H9_12, ~as.numeric(.>0)),
    H9_13 = natozero(H9_13)
  ) %>% 
  mutate(
    mthsfuelused = select(., H9_1:H9_12) %>% rowSums(na.rm = TRUE),
    useallyear = as.numeric(H9_13 == 111),
    mthsfuelused = ifelse(useallyear == 1, 12, mthsfuelused)
  ) %>% 
  group_by(HHID) %>% 
  summarise(cleanfuelmths = sum(mthsfuelused)) %>% 
  mutate(cleanfuelmths = ifelse(cleanfuelmths > 12,12,cleanfuelmths)) %>% 
  full_join(hh %>% select(HHID))

# Extract if electricity used for cooking
elcook <- rwanda_stove %>% 
  group_by(HHID) %>% 
  summarise(elcook = sum(I18A == 17)) %>% 
  mutate(elcook = natozero(elcook))
    
rwanda <- left_join(hh, gen, by = c("HHID"))
rwanda <- left_join(rwanda, avail, by = c("HHID"))
rwanda <- left_join(rwanda, elcons, by = c("HHID"))
rwanda <- left_join(rwanda, elexp, by = c("HHID"))
rwanda <- left_join(rwanda, fuel, by = c("HHID"))
rwanda <- left_join(rwanda, fuelmths, by = c("HHID"))
rwanda <- left_join(rwanda, stove, by = c("HHID"))
rwanda <- left_join(rwanda, stove_af, by = c("HHID"))
rwanda <- left_join(rwanda, elcook, by = c("HHID"))
rwanda <- left_join(rwanda, apps, by = c("HHID"))

rwanda$Country <- "Rwanda"

rwanda <- rwanda %>% 
  select(Country, HHID, rural, admin1, admin2, strata, psu, weight, femalehead, educ10th, 
         nonfarmemp, hh_exp_annual, grid, mg, solar, dg, batt, availability, availability_eve,
         kwh_consumption, srv_none, srv_minimum, srv_decent, srv_affluent,
         primarystoveclean_stated, stoveuseminutes_clean, stoveuseminutes_solids, 
         cleanfuelmths, elexp_total, cookexp_total,everything()
         )

# ETHIOPIA ---------------------------------------------------------------------

ethiopia_gen <- read.dta13(here("Data", "ethiopia", "Identification.dta"), 
                             convert.factors = F)

ethiopia_avail <- read.dta13(here("Data", "ethiopia", "Section C.dta"), 
                           convert.factors = F)

ethiopia_solar <- read.dta13(here("Data", "ethiopia", "Section C_C144_165.dta"), 
                                convert.factors = F)

ethiopia_solar2 <- read.dta13(here("Data", "ethiopia", "Section C_C125_143.dta"), 
                             convert.factors = F)

ethiopia_hh <- read.dta13(here("Data", "ethiopia", "Section A_HHRoster.dta"), 
                          convert.factors = F)

ethiopia_apps <- read.dta13(here("Data", "ethiopia", "Section M_M17_M41.dta"),
                          generate.factors = T, nonint.factors = T)

ethiopia_fuel <- read.dta13(here::here("Data", "ethiopia", "Section H.dta"), 
                            convert.factors = F)
ethiopia_fuel2 <- read.dta13(here::here("Data", "ethiopia", "Section H_H10.dta"), 
                             convert.factors = F)

ethiopia_stove <- read.dta13(here::here("Data","ethiopia", "Section I.dta"), 
                             convert.factors = F)
ethiopia_stove2 <- read.dta13(here::here("Data","ethiopia", "Section I_I13.dta"), 
                              convert.factors = F)
ethiopia_stove3 <- read.dta13(here::here("Data","ethiopia", "Section I_main_cookstove.dta"), 
                              convert.factors = F)

ethiopia_exp1 <- read.dta13(here::here("Data","ethiopia", "Section L_consumption.dta"), 
                            convert.factors = F)
ethiopia_exp2 <- read.dta13(here::here("Data","ethiopia", "Section L_expenditure.dta"), 
                            convert.factors = F)
ethiopia_exp3 <- read.dta13(here::here("Data","ethiopia", "Section L_goods&services.dta"), 
                            convert.factors = F)

ethiopia_weights <- read.dta13(here("Data", "ethiopia", "weights_ethiopia.dta"))

gen <- ethiopia_gen %>%
  transmute(HHID = as.character(HHID),
            admin1 = HI_1,
            admin2 = paste0(HI_2, "_", HI_3),
            rural = as.numeric(HI_6 == 2),
            strata = paste0(HI_1,"_",rural), # sample is stratified by region and rural/urban
            psu = HI_5)

weights <- ethiopia_weights %>% 
  transmute(HHID = as.character(HHID),
            weight = as.numeric(hh_weights))

gen <- left_join(gen, weights, by = "HHID")

hh <- ethiopia_hh %>%
  group_by(HHID) %>% 
  summarise(
    femalehead = max(as.numeric(A3 == 2 & A4 == 1), na.rm = T),
    educ10th = max(as.numeric(A9 %in% c(11:17)), na.rm = T),
    nonfarmemp = max(as.numeric(A12 %in% c(1,3,4))))

hh_exp1 <- ethiopia_exp1 %>% 
  mutate(L1 = naconvert(L1)) %>%
  group_by(HHID) %>% 
  summarise(hh_exp = sum(L1, na.rm = T)*52) # Weekly consumption to annual

hh_exp2 <- ethiopia_exp2 %>% 
  mutate(L2 = naconvert(L2)) %>%
  group_by(HHID) %>% 
  summarise(hh_exp = sum(L2, na.rm = T)*12) # Monthly G&S expenditure to annual

hh_exp3 <- ethiopia_exp3 %>% 
  mutate(L3 = naconvert(L3)) %>%
  group_by(HHID) %>% 
  summarise(hh_exp = sum(L3, na.rm = T)) # Annual G&S expenditure

hh_exp <- rbind(hh_exp1, hh_exp2) %>% 
  rbind(hh_exp3) %>% 
  group_by(HHID) %>% 
  summarise(hh_exp_annual = sum(hh_exp, na.rm = T))

hh <- left_join(hh, hh_exp, by = "HHID")

# Calculate electricity availability, consumption and expenditures
avail <- ethiopia_avail %>%
  transmute(HHID = HHID,
            availability_grid = C25_tm,
            availability_eve_grid = C26_tm,
            availability_mg = C62_tm,
            availability_eve_mg = C63_tm,
            availability_dg = C97_tm,
            availability_eve_dg = C98_tm,
            availability_batt = C117,
            grid = as.numeric(C2 == 1),
            mg = natozero(as.numeric(C38 == 1)),
            solar = natozero(as.numeric(C121 != 4)),
            dg = natozero(as.numeric(C75 == 1)),
            batt = natozero(as.numeric(C103 == 1))
  ) %>%
  mutate_all(~naconvert(.))

availsolar <- ethiopia_solar %>%
  transmute(HHID = HHID,
            availability_solar = C146_tm,
            availability_eve_solar = C147_tm)

avail <- left_join(avail, availsolar, by = c("HHID"))

avail <- avail %>% 
  rowwise() %>% 
  transmute(HHID = HHID,
            availability = max(availability_grid, availability_mg, availability_solar,
                               availability_batt, availability_dg,
                               na.rm = T),
            availability = ifelse(availability < 0, 0, availability), # Cases were max(..) returns -Inf as no modern electricity access
            availability_eve = max(availability_eve_grid, availability_eve_mg, availability_eve_solar,
                               availability_batt, availability_eve_dg, na.rm = T),
            availability_eve = ifelse(availability_eve < 0, 0, availability_eve),
            grid = grid, mg = mg, solar = solar, batt = batt, dg = dg)

elexp <- ethiopia_avail %>% 
  transmute(
    HHID = HHID,
    exp_grid = C20,
    exp_mg = C57,
    exp_batt = C114) %>% 
  mutate(across(exp_grid:exp_batt, ~naconvert(.)))

dgexp <- ethiopia_avail %>% 
  select(
    HHID,
    dg_rent = C88,
    dg_om = C90,
    dg_fuel = C94) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  mutate(exp_dg = dg_om/12 + dg_fuel + dg_rent) %>% 
  group_by(HHID) %>% 
  summarise(exp_dg = sum(exp_dg, na.rm = T)) %>% 
  ungroup()

solarexp <- ethiopia_solar2 %>% 
  select(
    HHID,
    solar_capex = C137,
    solar_payg = C139) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  mutate(exp_solar = round(annuity.instalment(0.1, n.periods = 5, pv = solar_capex)/12 + 
           solar_payg,0)) %>% 
  group_by(HHID) %>% 
  summarise(exp_solar = sum(exp_solar, na.rm = T)) %>% 
  ungroup()

elexp <- left_join(elexp, solarexp) %>% 
  left_join(dgexp) %>% 
  transmute(HHID = HHID,
            elexp_total = select(.,exp_grid:exp_dg) %>% rowSums(., na.rm = T),
            elexp_total = elexp_total * 12) 
  
elcons <- ethiopia_avail %>% 
  mutate(across(c(C21,C58), ~natozero(.))
  ) %>% 
  transmute(
    HHID = HHID,
    kwh_consumption = select(.,c(C21,C58)) %>% rowSums(.,na.rm = T))

# Calculate AF services tier
apps <-ethiopia_apps %>%
  select(HHID, M_item, M_a) %>%
  spread(M_item, M_a)

names(apps)<-make.names(names(apps),unique = TRUE)

apps <- apps %>% 
  transmute(
    HHID = HHID,
    srv_minimum = case_when(
      rowSums(select(., incandescent.light.bulb:led.light.bulb)) > 0 &
        rowSums(select(., smartphone..internet.phone..charger,
                       regular.mobile.phone.charger)) > 0 ~ 1,
      TRUE ~ 0),
    srv_decent = case_when(
      rowSums(select(., black...white.tv:flat.color.tv, fan, 
                     air.cooler..external.unit., space.heater,
                     refrigerator)) > 0 ~ 1,
      TRUE ~ 0),
    srv_affluent = case_when(
      rowSums(select(., washing.machine,
                     microwave.oven)) > 0 ~ 1,
      TRUE ~ 0)
  ) %>% 
  mutate(
    srv_minimum = as.numeric(
      srv_minimum == 1 & rowSums(select(., srv_decent:srv_affluent)) == 0),
    srv_decent = as.numeric(
      srv_decent == 1 & srv_affluent == 0),
    srv_affluent = srv_affluent,
    srv_none = as.numeric(rowSums(select(., srv_minimum:srv_affluent)) == 0)
  ) %>% 
  full_join(gen %>% select(HHID))

# Extract fuel costs
fuel <- ethiopia_fuel %>%
  filter(H5 == 1, H2 != 14) %>%  # Filter electricity for cooking as captured already
  mutate(H15 = naconvert(H15)) %>% 
  group_by(HHID) %>% 
  summarise(cookexp_total = sum(H15, na.rm = T)) %>% 
  mutate(cookexp_total = cookexp_total * 12) %>% 
  full_join(gen %>% select(HHID))

# Extract stated primary stove and classify whether clean
stove <- ethiopia_stove %>% 
  mutate(primarystovetype = as.numeric(I20_a %in% c(1,7,14,16))
  ) %>% 
  group_by(HHID) %>% 
  summarise(primarystoveclean_stated = max(primarystovetype)) %>% 
  full_join(gen %>% select(HHID))

# Extract primary stove by timeuse (AF) (selects solidfuel stove in edge case)
stove_af <- ethiopia_stove %>% 
  mutate(across(I25:I27, ~naconvert(.))) %>% 
  mutate(primarystovetype_af = ifelse(I20_a %in% c(1,7,14,16),"clean","solids")
  ) %>% 
  group_by(HHID, primarystovetype_af) %>% 
  summarise(across(c(I25:I27), ~sum(.,na.rm = T))) %>% 
  group_by(HHID, primarystovetype_af) %>% 
  mutate(stoveuseminutes = I25+I26+I27) %>% 
  pivot_wider(id_cols = HHID, names_from = primarystovetype_af,
              values_from = stoveuseminutes, names_prefix = "stoveuseminutes_") %>% 
  mutate(across(matches("stoveuseminutes"), ~natozero(.))
  ) %>% 
  full_join(gen %>% select(HHID)) %>% 
  rowwise() %>% 
  mutate(
    stoveuseminutes_clean = 
      ifelse((stoveuseminutes_solids + stoveuseminutes_clean) == 0, NA, stoveuseminutes_clean),
    stoveuseminutes_solids = 
      ifelse(is.na(stoveuseminutes_clean), NA, stoveuseminutes_solids),
  ) # Creates NAs in case both clean and solid fuel timeuse is 0

# Extract clean fuel usage months
fuelmths <- ethiopia_fuel %>%
  filter(H5 == 1)

fuel2 <- pivot_wider(ethiopia_fuel2 %>% mutate(value = 1,
                                               H10 = paste0("month_",H10)),
                              names_from = H10, values_from = value) %>%
  select(HHID, H2, str_sort(tidyselect::peek_vars(), numeric = T))

fuelmths <- left_join(fuelmths, fuel2, by = c("HHID", "H2"))

fuelmths <- fuelmths %>% 
  filter(H2 %in% c(1,7,14,16)) %>% 
  mutate(across(month_1:month_12, ~naconvert(.))
  ) %>% 
  transmute(
    HHID = HHID,
    across(month_1:month_12, ~as.numeric(.>0)),
    month_13 = ifelse(H2 == 7, 1, month_13), # availability for electricity is not captured, we assume 12 months
    month_13 = natozero(month_13)
  ) %>% 
  mutate(
    mthsfuelused = select(., month_1:month_12) %>% rowSums(na.rm = TRUE),
    useallyear = as.numeric(month_13 == 1),
    mthsfuelused = ifelse(useallyear == 1, 12, mthsfuelused)
  ) %>% 
  group_by(HHID) %>% 
  summarise(cleanfuelmths = sum(mthsfuelused, na.rm = T)) %>% 
  mutate(cleanfuelmths = ifelse(cleanfuelmths > 12,12,cleanfuelmths)) %>% 
  full_join(gen %>% select(HHID))

# Extract if electricity used for cooking
elcook <- ethiopia_stove %>% 
  group_by(HHID) %>% 
  summarise(elcook = sum(I20_a == 14)) %>% 
  mutate(elcook = ifelse(elcook > 0 & !is.na(elcook), 1,0)) %>% 
  full_join(gen %>% select(HHID))

ethiopia <- left_join(hh, gen, by = c("HHID"))
ethiopia <- left_join(ethiopia, avail, by = c("HHID"))
ethiopia <- left_join(ethiopia, elcons, by = c("HHID"))
ethiopia <- left_join(ethiopia, elexp, by = c("HHID"))
ethiopia <- left_join(ethiopia, fuel, by = c("HHID"))
ethiopia <- left_join(ethiopia, fuelmths, by = c("HHID"))
ethiopia <- left_join(ethiopia, stove, by = c("HHID"))
ethiopia <- left_join(ethiopia, stove_af, by = c("HHID"))
ethiopia <- left_join(ethiopia, elcook, by = c("HHID"))
ethiopia <- left_join(ethiopia, apps, by = c("HHID"))

ethiopia <- ethiopia %>% 
  mutate(Country = "Ethiopia"
  ) %>% 
  select(names(rwanda))
  
# CAMBODIA ---------------------------------------------------------------------

cambodia_main <- read.dta13(here("Data", "cambodia", "Main Dataset.dta"), 
                          convert.factors = F)

cambodia_solar <- read.dta13(here("Data", "cambodia", "Solar Roster.dta"), 
                            convert.factors = F)

cambodia_hh <- read.dta13(here("Data", "cambodia", "HHRoster.dta"), 
                        convert.factors = F)

cambodia_fuel <- read.dta13(here("Data", "cambodia", "Roster H.dta"), 
                          convert.factors = F)

cambodia_stove <- read.dta13(here("Data", "cambodia", "Roster I.dta"), 
                            convert.factors = F)

gen <- cambodia_main %>% 
  filter(!is.na(pw)) %>% 
  transmute(HHID = as.character(Household_ID),
            admin1 = Province,
            admin2 = District,
            strata = paste0(Province,"_", Locality), # assumes stratification at province by rural/urban
            psu = Village,
            rural = as.numeric(Locality == 2),
            weight = pw)

hh <- cambodia_hh %>% 
  filter(!is.na(pw)) %>% 
  mutate(HHID = as.character(Household_ID)) %>% 
  group_by(HHID) %>% 
  summarise(
    femalehead = max(as.numeric(HHgen == 2 & head == 1), na.rm = T),
    educ10th = max(as.numeric(A9 %in% c(10:15)), na.rm = T),
    nonfarmemp = max(as.numeric(A14 %in% c(1,3,4))))

hh_exp <- cambodia_main %>% 
  mutate(HHID = as.character(Household_ID)) %>% 
  mutate(across(c(L1:L33), ~naconvert(.))
  ) %>% 
  select(HHID, L1:L33) %>% 
  summarise(
    HHID = HHID,
    hh_exp1 = select(.,L1:L11) %>% rowSums(., na.rm = T), 
    hh_exp2 = select(.,L12:L19) %>% rowSums(., na.rm = T), 
    hh_exp3 = select(.,L20:L33) %>% rowSums(., na.rm = T) # Annual G&S expenditure
    ) %>%
    mutate(hh_exp1 = hh_exp1*52, # Weekly consumption to annual
           hh_exp2 = hh_exp2*12 # Monthly G&S expenditure to annual
           ) %>% 
  transmute(HHID = HHID,
            hh_exp_annual = select(.,hh_exp1:hh_exp3) %>% rowSums(.,na.rm = T))

hh <- left_join(hh, hh_exp, by = "HHID")

# Calculate electricity availability, consumption and expenditures
avail <- cambodia_main %>% 
  transmute(HHID = as.character(Household_ID),
            availability_grid = C25b,
            availability_eve_grid = C26b,
            availability_batt = C118,
            availability_dg = C98b,
            availability_eve_dg = C99b,
            grid = as.numeric(C2 == 1),
            mg = 0,
            batt = natozero(C105a),
            dg = natozero(naconvert(C75)),
            solar = as.numeric(C122 == 1),) %>% 
  filter(HHID %in% hh$HHID) %>% 
  mutate_all(~naconvert(.))

availsolar <- cambodia_solar %>% 
  transmute(HHID = as.character(Household_ID),
            availability_solar = naconvert(C148b),
            availability_eve_solar = naconvert(C149b)) %>% 
  group_by(HHID) %>% 
  filter(availability_eve_solar == max(availability_eve_solar)) %>%
  distinct(HHID, .keep_all = T) %>% 
  filter(HHID %in% hh$HHID)

avail <- left_join(avail, availsolar, by = c("HHID"))

avail <- avail %>% 
  mutate(across(matches("availability"), ~naconvert(.))) %>% 
  rowwise() %>% 
  transmute(HHID = HHID,
            grid = grid,
            mg = mg,
            solar = solar,
            dg = dg,
            batt = batt,
            availability = max(availability_grid, availability_solar, 
                               availability_batt, availability_dg, na.rm = T),
            availability = ifelse(availability < 0, 0, availability), # Cases were max(..) returns -Inf as no modern electricity access
            availability_eve = max(availability_eve_grid, availability_eve_solar, 
                                   availability_batt, availability_eve_dg, na.rm = T),
            availability_eve = ifelse(availability_eve < 0, 0, availability_eve))

elexp <- cambodia_main %>% 
  transmute(
    HHID = as.character(Household_ID),
    exp_grid = C20,
    exp_mg = NA,
    exp_batt = C115) %>% 
  mutate(across(exp_grid:exp_batt, ~naconvert(.)))

dgexp <- cambodia_main %>% 
  transmute(
    HHID = as.character(Household_ID),
    dg_rent = C88,
    dg_om = C90,
    dg_fuel = C94) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  mutate(exp_dg = dg_om/12 + dg_fuel + dg_rent) %>% 
  group_by(HHID) %>% 
  summarise(exp_dg = sum(exp_dg, na.rm = T)) %>% 
  ungroup()

solarexp <- cambodia_solar %>% 
  transmute(
    HHID = as.character(Household_ID),
    solar_capex = C139c,
    solar_payg = C141) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  mutate(exp_solar = round(annuity.instalment(0.1, n.periods = 5, pv = solar_capex)/12 + 
           solar_payg,0)) %>% 
  group_by(HHID) %>% 
  summarise(exp_solar = sum(exp_solar, na.rm = T)) %>% 
  ungroup()

elexp <- left_join(elexp, solarexp) %>%
  left_join(dgexp) %>% 
  filter(HHID %in% hh$HHID) %>% 
  transmute(HHID = HHID,
            elexp_total = select(.,exp_grid:exp_dg) %>% rowSums(., na.rm = T),
            elexp_total = elexp_total * 12)  
  
elcons <- cambodia_main %>% 
  transmute(
    HHID = as.character(Household_ID),
    kwh_consumption = C21
    ) %>%
  mutate_all(~naconvert(.)) %>% 
  filter(HHID %in% hh$HHID)

# Calculate AF services tier
apps <- cambodia_main %>% 
  transmute(
    HHID = as.character(Household_ID),
    srv_minimum = case_when(
      rowSums(select(., M14:M17)) > 0 &
        rowSums(select(., M31, M32)) > 0 ~ 1,
      TRUE ~ 0),
    srv_decent = case_when(
      rowSums(select(., M33a, M34a, M35a, M20a, M26, M21)) > 0 ~ 1,
      TRUE ~ 0),
    srv_affluent = case_when(
      rowSums(select(., M24, M22)) > 0 ~ 1,
      TRUE ~ 0)
  ) %>% 
  mutate(
    srv_minimum = as.numeric(
      srv_minimum == 1 & rowSums(select(., srv_decent:srv_affluent)) == 0),
    srv_decent = as.numeric(
      srv_decent == 1 & srv_affluent == 0),
    srv_affluent = srv_affluent,
    srv_none = as.numeric(rowSums(select(., srv_minimum:srv_affluent)) == 0)
  ) %>% 
  filter(HHID %in% hh$HHID)

# Extract fuel costs
fuel <- cambodia_fuel %>%
  filter(H4_2 == 1, H3 != 10) %>%  # Filter electricity for cooking as captured already
  mutate(HHID = as.character(Household_ID),
         H14 = naconvert(H14)) %>% 
  group_by(HHID) %>% 
  summarise(cookexp_total = sum(H14, na.rm = T)) %>% 
  mutate(cookexp_total = cookexp_total * 12) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID))

# Extract stated primary stove and classify whether clean
stove <- cambodia_main %>% 
  transmute(
    HHID = as.character(Household_ID),
    primarystoveclean_stated = as.numeric(modern == 1)
  ) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID))

# Extract primary stove by timeuse (AF) (selects solidfuel stove in edge case)
stove_af <- cambodia_stove %>% 
  mutate(HHID = as.character(Household_ID),
         across(I23:I25, ~naconvert(.))) %>% 
  mutate(primarystovetype_af = ifelse(I18a %in% c(1,5,10,12),"clean","solids")
  ) %>% 
  group_by(HHID, primarystovetype_af) %>% 
  summarise(across(c(I23:I25), ~sum(.,na.rm = T))) %>% 
  group_by(HHID, primarystovetype_af) %>% 
  mutate(stoveuseminutes = I23+I24+I25) %>% 
  pivot_wider(id_cols = HHID, names_from = primarystovetype_af,
              values_from = stoveuseminutes, names_prefix = "stoveuseminutes_") %>% 
  mutate(across(matches("stoveuseminutes"), ~natozero(.))
  ) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID)) %>%
  rowwise() %>% 
  mutate(
    stoveuseminutes_clean = 
      ifelse((stoveuseminutes_solids + stoveuseminutes_clean) == 0, NA, stoveuseminutes_clean),
    stoveuseminutes_solids = 
      ifelse(is.na(stoveuseminutes_clean), NA, stoveuseminutes_solids),
  ) # Creates NAs in case both clean and solid fuel timeuse is 0

# Extract clean fuel usage months
fuelmths <- cambodia_fuel %>% 
  filter(H3 %in% c(1,10,12), H4_2 == 1) %>% 
  mutate(across(H10_1:H10_12, ~naconvert(.))
  ) %>% 
  transmute(
    HHID = as.character(Household_ID),
    across(H10_1:H10_12, ~as.numeric(.>0)),
    H10a = natozero(H10a)
  ) %>% 
  mutate(
    mthsfuelused = select(., H10_1:H10_12) %>% rowSums(na.rm = TRUE),
    useallyear = as.numeric(H10a == 1),
    mthsfuelused = ifelse(useallyear == 1, 12, mthsfuelused)
  ) %>% 
  group_by(HHID) %>% 
  summarise(cleanfuelmths = sum(mthsfuelused)) %>% 
  mutate(cleanfuelmths = ifelse(cleanfuelmths > 12,12,cleanfuelmths)) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID))

# Extract if electricity used for cooking
elcook <- cambodia_stove %>% 
  mutate(HHID = as.character(Household_ID)
  ) %>% 
  group_by(HHID) %>% 
  summarise(elcook = sum(I18a == 10, na.rm = T)) %>% 
  mutate(elcook = ifelse(elcook > 0 & !is.na(elcook), 1,0)) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID))

cambodia <- left_join(hh, gen, by = c("HHID"))
cambodia <- left_join(cambodia, avail, by = c("HHID"))
cambodia <- left_join(cambodia, elcons, by = c("HHID"))
cambodia <- left_join(cambodia, elexp, by = c("HHID"))
cambodia <- left_join(cambodia, fuel, by = c("HHID"))
cambodia <- left_join(cambodia, fuelmths, by = c("HHID"))
cambodia <- left_join(cambodia, stove, by = c("HHID"))
cambodia <- left_join(cambodia, stove_af, by = c("HHID"))
cambodia <- left_join(cambodia, elcook, by = c("HHID"))
cambodia <- left_join(cambodia, apps, by = c("HHID"))

cambodia <- cambodia %>% 
  mutate(Country = "Cambodia") %>% 
  select(names(rwanda))

# MYANMAR ----------------------------------------------------------------------

myanmar_main <- read.dta13(here("Data", "myanmar", "Myanmar_01_main.dta"), 
                           convert.factors = F)

myanmar_solar <- read.dta13(here("Data", "myanmar", "Myanmar_05_solar.dta"), 
                           convert.factors = F)

myanmar_hh <- read.dta13(here("Data", "myanmar", "Myanmar_02_individual.dta"), 
                         convert.factors = F)

myanmar_apps <- read.dta13(here("Data", "myanmar", "Myanmar_03_appliance.dta"),
                           nonint.factors = T)

myanmar_weights <- read.dta13(here("Data", "myanmar", "weights_myanmar.dta"), 
                              convert.factors = F)

myanmar_stove <- read.dta13(here("Data", "myanmar", "Myanmar_09_Cookstove_H1A.dta"), 
                            convert.factors = F)

myanmar_stove2 <- read.dta13(here("Data", "myanmar", "Myanmar_10_Cookstove_detailed.dta"), 
                             convert.factors = F)

hh <- myanmar_hh %>%
  group_by(HHID) %>% 
  summarise(
    femalehead = max(as.numeric(A02_02 == 2 & A02_03 == 1), na.rm = T),
    educ10th = max(as.numeric(A03_04 %in% c(10:14, 20, 30, 40, 50)), na.rm = T),
    nonfarmemp = max(as.numeric(A04_03 %in% c(1, 3, 4))))

weights <- myanmar_weights %>% 
  transmute(HHID = HHID,
            weight = as.numeric(whhd))

hh <- left_join(hh, weights, by = "HHID") %>% 
  filter(!is.na(weight))

hh_exp <- myanmar_main %>% 
  filter(HHID %in% hh$HHID) %>% 
  mutate(across(c(I01_01:I03_14), ~natozero(.))
  ) %>% 
  select(HHID, I01_01:I03_14) %>% 
  summarise(
    HHID = HHID,
    hh_exp1 = select(.,I01_01:I01_11) %>% rowSums(., na.rm = T), 
    hh_exp2 = select(.,I02_01:I02_08) %>% rowSums(., na.rm = T), 
    hh_exp3 = select(.,I03_01:I03_14) %>% rowSums(., na.rm = T) # Annual G&S expenditure
  ) %>%
  mutate(hh_exp1 = hh_exp1*52, # Weekly consumption to annual
         hh_exp2 = hh_exp2*12 # Monthly G&S expenditure to annual
  ) %>% 
  transmute(HHID = HHID,
            hh_exp_annual = select(.,hh_exp1:hh_exp3) %>% rowSums(.,na.rm = T)) %>% 
  group_by(HHID) %>% 
  distinct(HHID, .keep_all = T) # Removes ~10 duplicated identical observations

hh <- left_join(hh, hh_exp, by = "HHID")

gen <- myanmar_main %>%
  transmute(HHID = HHID,
            admin1 = ID1_01,
            admin2 = ID1_02,
            strata = paste0(ID1_01, "_", ID1_04), # sample stratified by state and rural/urban
            psu = EACode,
            rural = as.numeric(ID1_04 == 2)
  ) %>%
  filter(HHID %in% hh$HHID) %>% 
  distinct(HHID, .keep_all = T) # Removes ~10 duplicated identical observations

# Calculate electricity availability, consumption and expenditures
avail <- myanmar_main %>%
  transmute(HHID = HHID,
            availability_grid = C01b04b,
            availability_eve_grid = C01b05b,
            availability_mg = C02b04b,
            availability_eve_mg = C02b05b,
            availability_batt = C04b10,
            availability_solar = C05b03b,
            availability_eve_solar = C05b04b,
            availability_dg = C03c03b,
            availability_dg_eve = C03c04b,
            grid = as.numeric(C01f01 %in% c(1,2,3)),
            mg = as.numeric(C01f01 %in% c(4)),
            batt = as.numeric(C04a01 == 1),
            dg = as.numeric(C03a01 == 1),
            solar = as.numeric(select(.,C05a01__1:C05a01__3) %>% 
                               rowSums(.,na.rm = T) > 0)) %>%
  filter(HHID %in% hh$HHID) %>% 
  mutate_all(~naconvert(.))

avail <- avail %>%
  rowwise() %>%
  transmute(HHID = HHID,
            availability = max(availability_grid, availability_mg, availability_dg,
                               availability_solar, availability_batt, na.rm = T),
            availability = ifelse(availability < 0, 0, availability), # Cases were max(..) returns -Inf as no modern electricity access
            availability_eve = max(availability_eve_grid, availability_eve_mg, availability_dg_eve,
                               availability_eve_solar, availability_batt, na.rm = T),
            availability_eve = ifelse(availability_eve < 0, 0, availability_eve),
            grid = grid, mg = mg, solar = solar, batt = batt, dg = dg)  %>% 
  distinct(HHID, .keep_all = T) %>%  # Removes ~10 duplicated identical observations
  mutate(availability = naconvert(availability),
         availability_eve = naconvert(availability_eve))
  
elexp <- myanmar_main %>% 
  select(
    HHID = HHID,
    exp_grid = C01a12,
    exp_mg = C02a19,
    exp_batt = C04b07) %>% 
  mutate(across(exp_grid:exp_batt, ~naconvert(.))) 

dgexp <- myanmar_main %>% 
  transmute(
    HHID = HHID,
    dg_rent = C03b05,
    dg_om = C03b07,
    dg_fuel = C03b11) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  mutate(exp_dg = dg_om/12 + dg_fuel + dg_rent) %>% 
  group_by(HHID) %>% 
  summarise(exp_dg = sum(exp_dg, na.rm = T)) %>% 
  ungroup()

solarexp <- myanmar_solar %>% 
  transmute(
    HHID = HHID,
    solar_capex = C05a17,
    solar_payg = C05a19) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  mutate(exp_solar = round(annuity.instalment(0.1, n.periods = 5, pv = solar_capex)/12 + 
                             solar_payg,0)) %>% 
  group_by(HHID) %>% 
  summarise(exp_solar = sum(exp_solar, na.rm = T)) %>% 
  ungroup()

elexp <- left_join(elexp, solarexp) %>%
  left_join(dgexp) %>% 
  filter(HHID %in% hh$HHID) %>% 
  distinct(HHID, .keep_all = T) %>%  # Removes ~10 duplicated identical observations
  transmute(HHID = HHID,
            elexp_total = select(.,exp_grid:exp_dg) %>% rowSums(., na.rm = T),
            elexp_total = elexp_total * 12)  

elcons <- myanmar_main %>% 
  mutate(across(c(C01a13,C02a20), ~natozero(.))
  ) %>% 
  transmute(
    HHID = HHID,
    kwh_consumption = select(.,c(C01a13,C02a20)) %>% rowSums(.,na.rm = T))  %>% 
  filter(HHID %in% hh$HHID) %>% 
  distinct(HHID, .keep_all = T) # Removes ~10 duplicated identical observations

# Calculate AF services tier
apps <- myanmar_apps %>%
  select(HHID, Id, C2) %>%
  mutate(Id = as.character(Id)) %>%
  group_by(HHID, Id) %>%
  summarise(C2 = round(mean(C2, na.rm = T),0)) %>%
  ungroup()

apps <- apps %>%
  spread(Id, C2)

names(apps) <- make.names(names(apps),unique = TRUE)

apps <- apps %>% 
  transmute(
    HHID = HHID,
    srv_minimum = case_when(
      rowSums(select(., Compact.fluorescent.light.bulbs...16W.20W.:Compact.fluorescent.light.bulbs.51W.and.above,
                     Fluorescent.light.tubes..10.W.:Fluorescent.light.tubes..40.W.,
                     Incandescent.light.bulbs..25W.to.40W.:Incandescent.light.bulbs..76W.and.above.,
                     LED.light.bulbs...21W.to.30W.:LED.light.bulbs.51W.and.above), na.rm = T) > 0 &
        rowSums(select(., Regular.cell.phone, Smart.phone, Tablet), na.rm = T) > 0 ~ 1,
      TRUE ~ 0),
    srv_decent = case_when(
      rowSums(select(., B.W.TV, Flat.color.TV..LCD.LED., Regular.Color.TV,
                     Air.cooler, Fan, Air.conditioning, Refrigerator), na.rm = T) > 0 ~ 1,
      TRUE ~ 0),
    srv_affluent = case_when(
      rowSums(select(., Freezer,
                     Micro.wave.oven, Electric.stove.range..oven), na.rm = T) > 0 ~ 1,
      TRUE ~ 0)
  ) %>% 
  mutate(
    srv_minimum = as.numeric(
      srv_minimum == 1 & rowSums(select(., srv_decent:srv_affluent)) == 0),
    srv_decent = as.numeric(
      srv_decent == 1 & srv_affluent == 0),
    srv_affluent = srv_affluent,
    srv_none = as.numeric(rowSums(select(., srv_minimum:srv_affluent)) == 0)
  )   %>% 
  filter(HHID %in% hh$HHID) %>% 
  distinct(HHID, .keep_all = T) # Removes ~10 duplicated identical observations

# Extract fuel costs
fuel <- myanmar_main %>%
  mutate(HHID = as.character(HHID),
         lpgexp = ifelse(G01_01a == 1, G01_06,0),
         charexp = ifelse(G02_01a == 1, G02_06,0),
         woodexp = ifelse(G03_01a == 1, G03a04,0),
         biomexp = G04a04,
         briqexp = ifelse(G05_01a == 1, G05_06,0),
         coalexp = ifelse(G06_01a == 1, G06_06,0)
  ) %>% 
  mutate_all(~naconvert(.)
  ) %>%
  mutate_all(~natozero(.)
             ) %>% 
  summarise(HHID = HHID,
            cookexp_total = select(.,lpgexp:coalexp) %>% rowSums(.,na.rm = T)) %>% 
  mutate(cookexp_total = cookexp_total * 12) %>% 
  filter(HHID %in% hh$HHID) %>% 
  distinct(HHID, .keep_all = T) # Removes ~10 duplicated identical observations

# Extract stated primary stove and classify whether clean (edge case selects solid)
stove <- myanmar_stove2 %>% 
  filter(H01_19c == 1) %>% 
  transmute(HHID = HHID,
            primarystovetype = ifelse(H01_19a %in% c(4,6,13,15), "clean", "solids")
  ) %>%
  arrange(HHID, desc(primarystovetype)) %>% 
  distinct(HHID, .keep_all = T) %>% 
  transmute(HHID = HHID,
            primarystoveclean_stated = as.numeric(primarystovetype == "clean")) %>% 
  filter(HHID %in% hh$HHID) %>% 
  distinct(HHID, .keep_all = T) %>%  # Removes ~10 duplicated identical observations
  full_join(hh %>% select(HHID)) %>% 
  mutate_all(~natozero(.)) # Set households reporting NA to 0 due to survey design (conditional questions)

# Extract primary stove by timeuse (AF) (selects solidfuel stove in edge case)
stove_af <- myanmar_stove2 %>% 
  mutate(HHID = HHID,
         across(H01_11a:H01_11c, ~naconvert(.))) %>% 
  mutate(primarystovetype_af = ifelse(H01_19a %in% c(4,6,13,15),"clean","solids")
  ) %>% 
  group_by(HHID, primarystovetype_af) %>% 
  summarise(across(c(H01_11a:H01_11c), ~sum(.,na.rm = T))) %>% 
  group_by(HHID, primarystovetype_af) %>% 
  mutate(stoveuseminutes = H01_11a + H01_11b + H01_11c) %>% 
  pivot_wider(id_cols = HHID, names_from = primarystovetype_af,
              values_from = stoveuseminutes, names_prefix = "stoveuseminutes_") %>% 
  mutate(across(matches("stoveuseminutes"), ~natozero(.))
  ) %>% 
  filter(HHID %in% hh$HHID) %>% 
  distinct(HHID, .keep_all = T) %>%  # Removes duplicated identical observations
  full_join(hh %>% select(HHID)) %>% 
  rowwise() %>% 
  mutate(
    stoveuseminutes_clean = 
      ifelse((stoveuseminutes_solids + stoveuseminutes_clean) == 0, NA, stoveuseminutes_clean),
    stoveuseminutes_solids = 
      ifelse(is.na(stoveuseminutes_clean), NA, stoveuseminutes_solids),
  ) # Creates NAs in case both clean and solid fuel timeuse is 0

# Extract clean fuel usage months (proxy by stove use months due to survey)
fuelmths <- myanmar_stove2 %>% 
  filter(H01_19a %in% c(4,6,13,15)) %>% 
  mutate(across(H01_02_a__1:H01_02_a__13, ~naconvert(.))
         ) %>% 
  transmute(
    HHID = HHID,
    across(H01_02_a__1:H01_02_a__13, ~as.numeric(.>0))
  ) %>% 
  mutate(
    mthsfuelused = select(., H01_02_a__1:H01_02_a__12) %>% rowSums(na.rm = TRUE),
    useallyear = as.numeric(H01_02_a__13 == 1),
    mthsfuelused = ifelse(useallyear == 1, 12, mthsfuelused)
  ) %>% 
  group_by(HHID) %>% 
  summarise(cleanfuelmths = sum(mthsfuelused)) %>% 
  mutate(cleanfuelmths = ifelse(cleanfuelmths > 12,12,cleanfuelmths))  %>% 
  filter(HHID %in% hh$HHID) %>% 
  distinct(HHID, .keep_all = T) %>%  # Removes duplicated identical observations
  full_join(hh %>% select(HHID))

# Extract if electricity used for cooking
elcook <- myanmar_main %>% 
  transmute(HHID = HHID,
            elcook = as.numeric(G07_01a == 1 | G07_01c == 1 | G07_01e == 1)
  )  %>% 
  filter(HHID %in% hh$HHID) %>% 
  distinct(HHID, .keep_all = T)  # Removes ~10 duplicated identical observations

myanmar <- left_join(hh, gen, by = c("HHID"))
myanmar <- left_join(myanmar, avail, by = c("HHID"))
myanmar <- left_join(myanmar, elcons, by = c("HHID"))
myanmar <- left_join(myanmar, elexp, by = c("HHID"))
myanmar <- left_join(myanmar, fuel, by = c("HHID"))
myanmar <- left_join(myanmar, fuelmths, by = c("HHID"))
myanmar <- left_join(myanmar, stove, by = c("HHID"))
myanmar <- left_join(myanmar, stove_af, by = c("HHID"))
myanmar <- left_join(myanmar, elcook, by = c("HHID"))
myanmar <- left_join(myanmar, apps, by = c("HHID")) 

myanmar <- myanmar %>% 
  mutate(Country = "Myanmar") %>% 
  select(names(rwanda))

# HONDURAS ---------------------------------------------------------------------

honduras_main <- read.dta13(here("Data", "honduras", "MTF_HN17_HH_clean.dta"),
                            convert.factors = F)

honduras_solar <- read.dta13(here("Data", "honduras", "MTF_HN17_C_solar_clean.dta"),
                            convert.factors = F)

honduras_fuel <- read.dta13(here("Data", "honduras", "MTF_HN17_H_clean.dta"),
                            convert.factors = F)

honduras_stove <- read.dta13(here("Data", "honduras", "MTF_HN17_I_ALL.dta"),
                             convert.factors = F)
gen <- honduras_main %>% 
  transmute(HHID = idhh,
            rural = as.numeric(urban == 0),
            admin1 = Department,
            admin2 = Municipality,
            strata = paste0(Department, "_", urban),# sample is stratified by region and rural/urban
            psu = psu_id,
            weight = weight)

hh <- honduras_main %>% 
  mutate(HHID = idhh) %>% 
  group_by(HHID) %>% 
  summarise(
    femalehead = max(natozero(as.numeric(sexhead == 0)), na.rm = T),
    educ10th = max(natozero(as.numeric(sechead == 1 | techead == 1 | higherhead == 1)), na.rm = T),
    nonfarmemp = max(natozero(as.numeric(k003 %in% c(1,3,4)))))

hh_exp <- honduras_main %>% 
  mutate(HHID = idhh) %>% 
  mutate(across(c(l001:l025), ~naconvert(.))
  ) %>% 
  select(HHID, l001:l025) %>% 
  summarise(
    HHID = HHID,
    hh_exp1 = ifelse(select(.,l001:l009) %>% rowSums(., na.rm = T) < l009a,
                     l009a,select(.,l001:l009) %>% rowSums(., na.rm = T)), 
    hh_exp2 = select(.,l010:l017) %>% rowSums(., na.rm = T), 
    hh_exp3 = select(.,l018:l025) %>% rowSums(., na.rm = T) # Annual G&S expenditure
  ) %>%
  mutate(hh_exp1 = hh_exp1*52, # Weekly consumption to annual
         hh_exp2 = hh_exp2*12 # Monthly G&S expenditure to annual
  ) %>% 
  transmute(HHID = HHID,
            hh_exp_annual = select(.,hh_exp1:hh_exp3) %>% rowSums(.,na.rm = T))

hh <- left_join(hh, hh_exp, by = "HHID")

# Calculate electricity availability, consumption and expenditures
avail <- honduras_main %>% 
  transmute(HHID = idhh,
            grid = natozero(as.numeric(c003 == 1)),
            mg = natozero(as.numeric(c047 == 1)),
            solar = natozero(as.numeric(c132 == 1)),
            dg = natozero(as.numeric(c085 == 1)),
            batt = natozero(as.numeric(c114 == 1)),
            availability_grid = c026a,
            availability_eve_grid = c027a,
            availability_mg = c072a,
            availability_eve_mg = c073a,
            availability_solar = c159a,
            availability_eve_solar = c160a,
            availability_dg = c108b,
            availability_eve_dg = c109b,
            availability_batt = c127,
            availability_eve_batt = c128
            ) %>% 
  mutate_all(~naconvert(.))

avail <- avail %>% 
  rowwise() %>% 
  transmute(HHID = HHID,
            grid = grid, mg = mg, solar = solar, dg = dg, batt = batt,
            availability = max(availability_grid, availability_mg, availability_solar,
                               availability_batt, availability_dg, na.rm = T),
            availability = ifelse(availability < 0, 0, availability), # Cases where max(..) returns -Inf as no modern electricity access
            availability_eve = max(availability_eve_grid, availability_eve_mg, availability_eve_solar,
                               availability_eve_batt, availability_eve_dg, na.rm = T),
            availability_eve = ifelse(availability_eve < 0, 0, availability_eve))

elexp <- honduras_main %>% 
  transmute(
    HHID = idhh,
    exp_grid = c022,
    exp_mg = c066a,
    exp_batt = c125) %>% 
  mutate(across(exp_grid:exp_batt, ~naconvert(.)))

dgexp <- honduras_main %>% 
  transmute(
    HHID = idhh,
    dg_rent = c098,
    dg_om = c100,
    dg_fuel = c104) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  mutate(exp_dg = dg_om/12 + dg_fuel + dg_rent) %>% 
  group_by(HHID) %>% 
  summarise(exp_dg = sum(exp_dg, na.rm = T)) %>% 
  ungroup()

solarexp <- honduras_solar %>% 
  transmute(
    HHID = idhh,
    solar_capex = c149,
    solar_payg = c151) %>% 
  mutate_all(~naconvert(.)) %>%
  mutate_all(~natozero(.)) %>% 
  mutate(exp_solar = round(annuity.instalment(0.1, n.periods = 5, pv = solar_capex)/12 + 
                             solar_payg,0)) %>% 
  group_by(HHID) %>% 
  summarise(exp_solar = sum(exp_solar, na.rm = T)) %>% 
  ungroup()

elexp <- left_join(elexp, solarexp) %>%
  left_join(dgexp) %>% 
  transmute(HHID = HHID,
            elexp_total = select(.,exp_grid:exp_dg) %>% rowSums(., na.rm = T),
            elexp_total = elexp_total * 12)  

elcons <- honduras_main %>% 
  mutate(across(c(c021,c067), ~naconvert(.))
  ) %>% 
  mutate(across(c(c021,c067), ~natozero(.))
  ) %>% 
  transmute(
    HHID = idhh,
    kwh_consumption = c021 + c067
  )

# Calculate AF services tier
apps <- honduras_main %>% 
  transmute(
    HHID = idhh,
    srv_minimum = case_when(
      rowSums(select(., m017, m018, m019, m020)) > 0 &
        rowSums(select(., m036, m037)) > 0 ~ 1,
      TRUE ~ 0),
    srv_decent = case_when(
      rowSums(select(., m038a, m039a, m040a,
                     m024a, m030, m031, m025)) > 0 ~ 1,
      TRUE ~ 0),
    srv_affluent = case_when(
      rowSums(select(., m028,m026)) > 0 ~ 1,
      TRUE ~ 0)
  ) %>% 
  mutate(
    srv_minimum = as.numeric(
      srv_minimum == 1 & rowSums(select(., srv_decent:srv_affluent)) == 0),
    srv_decent = as.numeric(
      srv_decent == 1 & srv_affluent == 0),
    srv_affluent = srv_affluent,
    srv_none = as.numeric(rowSums(select(., srv_minimum:srv_affluent)) == 0)
  )

# Extract fuel costs
fuel <- honduras_fuel %>%
  filter(h005 == 1, h002 != 4) %>%  # Filter electricity for cooking as captured already
  mutate(HHID = idhh,
         h014 = naconvert(h014)) %>% 
  group_by(HHID) %>% 
  summarise(cookexp_total = sum(h014, na.rm = T)) %>% 
  mutate(cookexp_total = cookexp_total * 12) %>% 
  full_join(hh %>% select(HHID))

# Extract stated primary stove and classify whether clean
stove <- honduras_stove %>% 
  transmute(HHID = idhh,
            primarystoveclean_stated = 
               as.numeric((select(.,i004__4:i004__6) %>% rowSums(.,na.rm = T))>0)
  ) %>% 
  full_join(hh %>% select(HHID))

# Extract primary stove by timeuse (AF) (selects solidfuel stove in edge case)
stove_af <- honduras_stove %>% 
  mutate(HHID = idhh,
         across(i024:i026, ~naconvert(.))) %>% 
  mutate(primarystovetype_af = ifelse(select(.,i004__4:i004__6) %>% 
                                        rowSums(.,na.rm = T)>0, "clean", "solids")
  ) %>% 
  group_by(HHID, primarystovetype_af) %>% 
  summarise(across(c(i024:i026), ~sum(.,na.rm = T))) %>% 
  group_by(HHID, primarystovetype_af) %>% 
  mutate(stoveuseminutes = i024 + i025 + i026) %>% 
  pivot_wider(id_cols = HHID, names_from = primarystovetype_af,
              values_from = stoveuseminutes, names_prefix = "stoveuseminutes_") %>% 
  mutate(across(matches("stoveuseminutes"), ~natozero(.))
  ) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID)) %>%
  rowwise() %>% 
  mutate(
    stoveuseminutes_clean = 
      ifelse((stoveuseminutes_solids + stoveuseminutes_clean) == 0, NA, stoveuseminutes_clean),
    stoveuseminutes_solids = 
      ifelse(is.na(stoveuseminutes_clean), NA, stoveuseminutes_solids),
  ) # Creates NAs in case both clean and solid fuel timeuse is 0

# Extract clean fuel usage months
fuelmths <- honduras_fuel %>%
  filter(h005 == 1, h002 %in%  c(1,4,11)) %>% 
  mutate(across(h010__1:h010__12, ~naconvert(.)),
         h010__111 = ifelse(h002 == 4, 1, h010__111)
  ) %>% 
  transmute(
    HHID = idhh,
    across(h010__1:h010__12, ~as.numeric(.>0)),
    h010__111 = natozero(h010__111)
  ) %>% 
  mutate(
    mthsfuelused = select(., h010__1:h010__12) %>% rowSums(na.rm = TRUE),
    useallyear = as.numeric(h010__111 == 1),
    mthsfuelused = ifelse(useallyear == 1, 12, mthsfuelused)
  ) %>% 
  group_by(HHID) %>% 
  summarise(cleanfuelmths = sum(mthsfuelused)) %>%  
  mutate(cleanfuelmths = ifelse(cleanfuelmths > 12,12,cleanfuelmths))  %>% 
  full_join(hh %>% select(HHID))

# Extract if electricity used for cooking
elcook <- honduras_fuel %>% 
  mutate(HHID = idhh) %>% 
  filter(h002 == 4) %>% 
  group_by(HHID) %>% 
  summarise(elcook = sum(h005 == 1)) %>% 
  mutate(elcook = ifelse(elcook > 0 & !is.na(elcook), 1,0))  %>% 
  full_join(hh %>% select(HHID))

honduras <- left_join(hh, gen, by = c("HHID"))
honduras <- left_join(honduras, avail, by = c("HHID"))
honduras <- left_join(honduras, elcons, by = c("HHID"))
honduras <- left_join(honduras, elexp, by = c("HHID"))
honduras <- left_join(honduras, fuel, by = c("HHID"))
honduras <- left_join(honduras, fuelmths, by = c("HHID"))
honduras <- left_join(honduras, stove, by = c("HHID"))
honduras <- left_join(honduras, stove_af, by = c("HHID"))
honduras <- left_join(honduras, elcook, by = c("HHID"))
honduras <- left_join(honduras, apps, by = c("HHID"))

honduras <- honduras %>% 
  mutate(Country = "Honduras") %>% 
  select(names(rwanda))

# NEPAL ------------------------------------------------------------------------

nepal_main <- read.dta13(here("Data", "nepal", "maindataset.dta"), 
                            convert.factors = F)

nepal_weights <- read.dta13(here("Data", "nepal", "sample_weights.dta"), 
                         convert.factors = F)

nepal_hh <- read.dta13(here("Data", "nepal", "FamilyMembers.dta"), 
                          convert.factors = F)

nepal_exp <- read.dta13(here("Data", "nepal", "M_consumption.dta"), 
                       convert.factors = F)

nepal_solar <- read.dta13(here("Data", "nepal", "C_SolarBasedDevice.dta"), 
                             convert.factors = F)

nepal_apps <- read.dta13(here("Data", "nepal", "NLIST2_Appliance.dta"), 
                          convert.factors = F)

nepal_stove <- read.dta13(here("Data", "nepal", "J_Cookstove.dta"), 
                             convert.factors = F)

hh <- nepal_hh %>% 
  group_by(HHID) %>% 
  summarise(
    femalehead = max(as.numeric(A3 == 2 & A4 == 1), na.rm = T),
    educ10th = max(as.numeric(A9 %in% c(3:7)), na.rm = T),
    nonfarmemp = max(as.numeric(A14 %in% c(1,3,4)))) %>% 
  left_join(nepal_weights %>% select(HHID, weight = hh_weight))

hh_exp <- nepal_exp %>% 
  group_by(HHID) %>% 
  summarise(hh_exp1 = sum(M_A)*52) # Weekly consumption to annual

hh_exp <- nepal_main %>% 
  mutate(across(c(M12:M33), ~naconvert(.))
  ) %>% 
  select(HHID, M12:M33) %>% 
  summarise(
    HHID = HHID,
    hh_exp2 = select(.,M12:M19) %>% rowSums(., na.rm = T), 
    hh_exp3 = select(.,M20:M33) %>% rowSums(., na.rm = T) # Annual G&S expenditure
  ) %>%
  mutate(hh_exp2 = hh_exp2*12 # Monthly G&S expenditure to annual
  ) %>% 
  left_join(hh_exp) %>% 
  transmute(HHID = HHID,
            hh_exp_annual = select(.,hh_exp1:hh_exp3) %>% rowSums(.,na.rm = T))

hh <- left_join(hh, hh_exp, by = "HHID")

gen <- nepal_main %>% 
  transmute(HHID = HHID,
            admin1 = Province,
            admin2 = District,
            strata = paste0(Province,"_", LOCALITY), # assumes stratification at province by rural/urban
            psu = EA,
            rural = as.numeric(LOCALITY == 2)) %>% 
  filter(HHID %in% hh$HHID)

# Calculate electricity availability, consumption and expenditures
avail <- nepal_main %>% 
  transmute(HHID = HHID,
            grid = as.numeric(C2 == 1),
            mg = as.numeric(C44 %in% c(1,2,4)),
            solar = as.numeric(C135__2 == 1 | C135__3 == 1),
            batt = as.numeric(C115 == 1),
            dg = as.numeric(C84 == 1),
            availability_grid = C29_B,
            availability_eve_grid = C30_B,
            availability_mg = C69_B,
            availability_eve_mg = C70_B,
            availability_solar = C163_B,
            availability_eve_solar = C164_B,
            availability_dg = C107_B,
            availability_eve_dg = C108_B,
            availability_batt = C129,
            availability_eve_batt = C130) %>% 
  filter(HHID %in% hh$HHID) %>% 
  mutate(across(matches("availability"), 
                ~naconvert(.))) %>% 
  rowwise() %>% 
  transmute(HHID = HHID,
            grid = grid, mg = mg, solar = solar, batt = batt, dg = dg,
            availability = max(availability_grid, availability_mg, 
                               availability_solar, availability_batt,
                               availability_dg, na.rm = T),
            availability = ifelse(availability < 0, 0, availability), # Cases were max(..) returns -Inf as no modern electricity access
            availability_eve = max(availability_eve_grid, availability_eve_mg, 
                               availability_eve_solar, availability_eve_batt,
                               availability_eve_dg, na.rm = T),
            availability_eve = ifelse(availability_eve < 0, 0, availability_eve))

elexp <- nepal_main %>% 
  transmute(
    HHID = HHID,
    exp_grid = C25,
    exp_mg = C63,
    exp_batt = C126) %>% 
  mutate(across(exp_grid:exp_batt, ~naconvert(.)))

dgexp <- nepal_main %>% 
  transmute(
    HHID = HHID,
    dg_rent = C97,
    dg_om = C99,
    dg_fuel = C101) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  mutate(exp_dg = dg_om/12 + dg_fuel + dg_rent) %>% 
  group_by(HHID) %>% 
  summarise(exp_dg = sum(exp_dg, na.rm = T)) %>% 
  ungroup()

solarexp <- nepal_solar %>% 
  transmute(
    HHID = HHID,
    solar_capex = C154_FULL,
    solar_downpayment = C154_PARTIAL,
    solar_payg = C156) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  mutate(exp_solar = round(annuity.instalment(0.1, n.periods = 5, pv = solar_capex)/12 + 
                             solar_payg + solar_downpayment,0)) %>% 
  group_by(HHID) %>% 
  summarise(exp_solar = sum(exp_solar, na.rm = T)) %>% 
  ungroup()

elexp <- left_join(elexp, solarexp) %>%
  left_join(dgexp) %>% 
  transmute(HHID = HHID,
            elexp_total = select(.,exp_grid:exp_dg) %>% rowSums(., na.rm = T),
            elexp_total = elexp_total * 12)  

elcons <- nepal_main %>% 
  transmute(
    HHID = HHID,
    kwh_consumption_grid = C24,
    kwh_consumption_mg = C64
  ) %>%
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  group_by(HHID) %>% 
  transmute(kwh_consumption = kwh_consumption_grid + kwh_consumption_mg)

# Calculate AF services tier
apps <- nepal_apps %>% 
  select(-N17_45) %>% 
  pivot_wider(id_cols = HHID, 
              names_from = Id, names_prefix = "app_", values_from = N17_45_A) %>% 
  mutate_all(~natozero(.)) %>% 
  transmute(
    HHID = HHID,
    srv_minimum = case_when(
      rowSums(select(., app_1:app_4)) > 0  &
        rowSums(select(., app_23:app_24)) > 0 ~ 1,
      TRUE ~ 0),
    srv_decent = case_when(
      rowSums(select(., app_7, app_16, app_17, app_25:app_27, app_8)) > 0 ~ 1,
      TRUE ~ 0),
    srv_affluent = case_when(
      rowSums(select(., app_12, app_14)) > 0 ~ 1,
      TRUE ~ 0)
  ) %>% 
  mutate(
    srv_minimum = as.numeric(
      srv_minimum == 1 & rowSums(select(., srv_decent:srv_affluent)) == 0),
    srv_decent = as.numeric(
      srv_decent == 1 & srv_affluent == 0),
    srv_affluent = srv_affluent,
    srv_none = as.numeric(rowSums(select(., srv_minimum:srv_affluent)) == 0)
  ) %>% 
  full_join(hh %>% select(HHID))

# Extract fuel costs
fuel <- nepal_stove %>%
  filter(J20_A != 12) %>%  # Filter electricity for cooking as captured already
  mutate(HHID = HHID,
         J37_A = natozero(naconvert(J37_A)),
         J37_B = natozero(naconvert(J37_B))) %>% 
  group_by(HHID) %>% 
  summarise(cookexp_total = sum(J37_A+J37_B)) %>% 
  mutate(cookexp_total = cookexp_total * 12)

# Extract stated primary stove and classify whether clean
stove <- nepal_stove %>% 
  filter(J34 == 1) %>% 
  transmute(
    HHID = HHID,
    primarystoveclean_stated = as.numeric(J20_A %in% c(1,12,14))
  ) %>% 
  group_by(HHID) %>% 
  summarise(primarystoveclean_stated = ifelse(
    sum(primarystoveclean_stated) > 0,1,0
  ))

# Extract primary stove by timeuse (AF) (selects solidfuel stove in edge case)
stove_af <- nepal_stove %>% 
  mutate(across(J25:J27, ~naconvert(.))) %>% 
  mutate(primarystovetype_af = ifelse(J20_A %in% c(1,12,14),"clean","solids")
  ) %>% 
  group_by(HHID, primarystovetype_af) %>% 
  summarise(across(c(J25:J27), ~sum(.,na.rm = T))) %>% 
  group_by(HHID, primarystovetype_af) %>% 
  mutate(stoveuseminutes = J25 + J26 + J27) %>% 
  pivot_wider(id_cols = HHID, names_from = primarystovetype_af,
              values_from = stoveuseminutes, names_prefix = "stoveuseminutes_") %>% 
  mutate(across(matches("stoveuseminutes"), ~natozero(.))
  ) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID)) %>%
  rowwise() %>% 
  mutate(
    stoveuseminutes_clean = 
      ifelse((stoveuseminutes_solids + stoveuseminutes_clean) == 0, NA, stoveuseminutes_clean),
    stoveuseminutes_solids = 
      ifelse(is.na(stoveuseminutes_clean), NA, stoveuseminutes_solids),
  ) # Creates NAs in case both clean and solid fuel timeuse is 0

# Extract clean fuel usage months
fuelmths <- nepal_stove %>% 
  filter(J20_A %in% c(1,12,14)) %>% 
  mutate(across(J15__1:J15__12, ~naconvert(.))
         ) %>% 
  transmute(
    HHID = HHID,
    across(J15__1:J15__12, ~as.numeric(.>0)),
    J15__111 = natozero(J15__111)
  ) %>% 
  mutate(
    mthsfuelused = select(., J15__1:J15__12) %>% rowSums(na.rm = TRUE),
    useallyear = as.numeric(J15__111 == 1),
    mthsfuelused = ifelse(useallyear == 1, 12, mthsfuelused)
  ) %>% 
  group_by(HHID) %>% 
  summarise(cleanfuelmths = sum(mthsfuelused)) %>% 
  mutate(cleanfuelmths = ifelse(cleanfuelmths > 12,12,cleanfuelmths)) %>% 
  full_join(hh %>% select(HHID))

# Extract if electricity used for cooking
elcook <- nepal_stove %>% 
  group_by(HHID) %>% 
  summarise(elcook = sum(J20_A == 12, na.rm = T)) %>% 
  mutate(elcook = natozero(elcook))

nepal <- left_join(hh, gen, by = c("HHID"))
nepal <- left_join(nepal, avail, by = c("HHID"))
nepal <- left_join(nepal, elcons, by = c("HHID"))
nepal <- left_join(nepal, elexp, by = c("HHID"))
nepal <- left_join(nepal, fuel, by = c("HHID"))
nepal <- left_join(nepal, fuelmths, by = c("HHID"))
nepal <- left_join(nepal, stove, by = c("HHID"))
nepal <- left_join(nepal, stove_af, by = c("HHID"))
nepal <- left_join(nepal, elcook, by = c("HHID"))
nepal <- left_join(nepal, apps, by = c("HHID"))

nepal <- nepal %>% 
  mutate(Country = "Nepal") %>% 
  select(names(rwanda))

# KENYA ------------------------------------------------------------------------

kenya_main <- read.dta13(here("Data", "kenya", "MTF_HH_Core_Survey_Final_Data_trimmed-2.dta"), 
                             convert.factors = F)

kenya_weights <- read.dta13(here("Data", "kenya", "weight.dta"),
                            convert.factors = F)

kenya_hh <- read.dta13(here("Data", "kenya", "MTF_HH_Roster.dta"), 
                       convert.factors = F)

kenya_solar <- read.dta13(here("Data", "kenya", "MTF_HH_Solar_Roster.dta"), 
                       convert.factors = F)

kenya_apps <- read.dta13(here("Data", "kenya", "MTF_HH_Sec.M3_Asset_Data_Final.dta"), 
                          convert.factors = F)

kenya_stove <- read.dta13(here("Data", "kenya", "MTF_HH_Cooking_Data_Final.dta"), 
                         convert.factors = F)

gen <- kenya_weights %>% 
  filter(class == 1) %>% 
  transmute(HHID = PARENT_KEY,
            admin1 = prov,
            admin2 = dist,
            rural = as.numeric(locality == 2),
            strata = paste0(prov,"_",locality), # sample is stratified by region and rural/urban
            psu = cluuq,
            weight = pw_final)

hh <- kenya_hh %>%
  mutate(HHID = PARENT_KEY) %>% 
  group_by(HHID) %>% 
  summarise(
    femalehead = max(as.numeric(a_4_rel_hhh == 1 & a_3_mf == 2), na.rm = T),
    educ10th = max(as.numeric(a_9a_sch_lev %in% c(3:7)), na.rm = T),
    nonfarmemp = max(as.numeric(a_14_main_occ %in% c(1,3,4))))

hh <- inner_join(hh, gen)

hh_exp <- kenya_main %>% 
  mutate(across(c(l_l_1:l_l_33), ~naconvert(.))
  ) %>% 
  select(PARENT_KEY, l_l_1:l_l_33) %>% 
  summarise(
    HHID = PARENT_KEY,
    hh_exp1 = select(.,l_l_1:l_l_11) %>% rowSums(., na.rm = T), 
    hh_exp2 = select(.,l_l_12:l_l_19) %>% rowSums(., na.rm = T), 
    hh_exp3 = select(.,l_l_20:l_l_33) %>% rowSums(., na.rm = T) # Annual G&S expenditure
  ) %>%
  mutate(hh_exp1 = hh_exp1*52, # Weekly consumption to annual
         hh_exp2 = hh_exp2*12 # Monthly G&S expenditure to annual
  ) %>% 
  transmute(HHID = HHID,
            hh_exp_annual = select(.,hh_exp1:hh_exp3) %>% rowSums(.,na.rm = T)) %>% 
  filter(HHID %in% hh$HHID)

hh <- left_join(hh, hh_exp, by = "HHID")

# Calculate electricity availability, consumption and expenditures
avail <- kenya_main %>%
  transmute(HHID = PARENT_KEY,
            availability_grid = c_c_25bi,
            availability_eve_grid = c_c_26b,
            availability_solar = c_c_147b_Typicalmonth,
            availability_eve_solar = c_c_148b_Typicalmonth,
            availability_batt = c_c_118,
            availability_dg = c_c_98B_typicalmonth,
            availability_eve_dg = c_c_99B_typicalmonth,
            grid = as.numeric(c_c_2 == 1),
            mg = as.numeric(c_c_38 == 1),
            batt = as.numeric(c_c_104 == 1),
            dg = as.numeric(c_c_75 == 1),
            solar = as.numeric(c_c_q122_1 == 1 | c_c_q122_2 == 1 | c_c_q122_3 == 1)
  ) %>%
  mutate_all(~naconvert(.)) %>%
  rowwise() %>%
  transmute(HHID = HHID,
            availability = max(availability_grid, availability_solar, 
                               availability_batt, availability_dg, na.rm = T),
            availability = ifelse(availability < 0, 0, availability), # Cases where max(..) returns -Inf as no modern electricity access
            availability_eve = max(availability_eve_grid, availability_eve_solar, 
                                   availability_batt, availability_dg, na.rm = T),
            availability_eve = ifelse(availability_eve < 0, 0, availability_eve),
            grid = grid,
            mg = mg,
            solar = natozero(solar),
            batt = batt,
            dg = dg) %>% 
  filter(HHID %in% hh$HHID)

elexp <- kenya_main %>% 
  transmute(
    HHID = PARENT_KEY,
    exp_grid = c_c_20,
    exp_mg = 0,
    exp_batt = c_c_115) %>% 
  mutate(across(exp_grid:exp_batt, ~naconvert(.)))

dgexp <- kenya_main %>% 
  transmute(
    HHID = PARENT_KEY,
    dg_rent = c_c_88,
    dg_om = c_c_90,
    dg_fuel = c_c_94) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  mutate(exp_dg = dg_om/12 + dg_fuel + dg_rent) %>% 
  group_by(HHID) %>% 
  summarise(exp_dg = sum(exp_dg, na.rm = T)) %>% 
  ungroup()

solarexp <- kenya_solar %>% 
  select(
    HHID = PARENT_KEY,
    solar_capex = c_139_uf_cost,
    solar_payg = c_141_mm_cost) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  mutate(exp_solar = round(annuity.instalment(0.1, n.periods = 5, pv = solar_capex)/12 + 
                             solar_payg,0)) %>% 
  group_by(HHID) %>% 
  summarise(exp_solar = sum(exp_solar, na.rm = T)) %>% 
  ungroup()

elexp <- left_join(elexp, solarexp) %>% 
  left_join(dgexp) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID)) %>% 
  transmute(HHID = HHID,
            elexp_total = select(.,exp_grid:exp_solar) %>% rowSums(., na.rm = T),
            elexp_total = elexp_total * 12)  

elcons <- kenya_main %>% 
  transmute(
    HHID = PARENT_KEY,
    kwh_consumption = naconvert(c_c_21)) %>% 
  filter(HHID %in% hh$HHID)

# Calculate AF services tier
apps <- kenya_main %>% 
  select(HHID = PARENT_KEY, m_m_3_group) %>% 
  splitstackshape::cSplit_e("m_m_3_group", sep = " ", fill = 0, type = "character") %>% 
  select(HHID,m_m_3_group,asset_bulb = m_m_3_group_15, asset_flurobulb = m_m_3_group_16, 
         asset_cflbulb = m_m_3_group_17, asset_led = m_m_3_group_18,
         asset_bwtv = m_m_3_group_36, asset_regtv = m_m_3_group_37,
         asset_flatv = m_m_3_group_38, asset_fan = m_m_3_group_22,
         asset_cooler = m_m_3_group_28, asset_fridge = m_m_3_group_23,
         asset_wmchne = m_m_3_group_26, asset_mwave = m_m_3_group_24) %>% 
  transmute(
    HHID = HHID,
    srv_minimum = case_when(
      rowSums(select(., asset_bulb:asset_led)) > 0 ~ 1,
      TRUE ~ 0),
    srv_decent = case_when(
      rowSums(select(., asset_bwtv:asset_flatv, asset_fan, 
                     asset_cooler, asset_fridge)) > 0 ~ 1,
      TRUE ~ 0),
    srv_affluent = case_when(
      rowSums(select(., asset_wmchne, asset_mwave)) > 0 ~ 1,
      TRUE ~ 0)
  ) %>% 
  mutate(
    srv_minimum = as.numeric(
      srv_minimum == 1 & rowSums(select(., srv_decent:srv_affluent)) == 0),
    srv_decent = as.numeric(
      srv_decent == 1 & srv_affluent == 0),
    srv_affluent = srv_affluent,
    srv_none = as.numeric(rowSums(select(., srv_minimum:srv_affluent)) == 0)
  ) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID))

# Extract fuel costs
fuel <- kenya_main %>%
  select(HHID = PARENT_KEY, matches("h_h_5"), matches("h_h_14")) %>%
  pivot_longer(cols = -HHID, names_pattern = "(.+\\d+)(\\w)", 
               names_to = c(".value", "set")) %>% 
  filter(h_h_5 == 1) %>% 
  mutate_all(~naconvert(.)) %>% 
  group_by(HHID) %>% 
  summarise(cookexp_total = sum(h_h_14, na.rm = T)) %>%  
  mutate(cookexp_total = cookexp_total * 12) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID))

# Extract stated primary stove and classify whether clean
stove <- kenya_main %>% 
  transmute(HHID = PARENT_KEY,
            primarystoveclean_stated = as.numeric(LPG_any == 1)) %>% # Assume if HH uses LPG, it is the main stove
  filter(HHID %in% hh$HHID)

# Extract primary stove by timeuse (AF) (selects solidfuel stove in edge case)
stove_af <- kenya_stove %>% 
  mutate(HHID = PARENT_KEY,
         across(i_23_hrs_morning:i_25_hrs_evening, ~naconvert(.))) %>% 
  mutate(primarystovetype_af = ifelse(i_18_a_1st_fuel %in% c(13:16),"clean","solids")
  ) %>% 
  group_by(HHID, primarystovetype_af) %>% 
  summarise(across(c(i_23_hrs_morning:i_25_hrs_evening), ~sum(.,na.rm = T))) %>% 
  group_by(HHID, primarystovetype_af) %>% 
  mutate(stoveuseminutes = i_23_hrs_morning + i_24_hrs_afternoon + i_25_hrs_evening) %>% 
  pivot_wider(id_cols = HHID, names_from = primarystovetype_af,
              values_from = stoveuseminutes, names_prefix = "stoveuseminutes_") %>% 
  mutate(across(matches("stoveuseminutes"), ~natozero(.))
  ) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID)) %>%
  rowwise() %>% 
  mutate(
    stoveuseminutes_clean = 
      ifelse((stoveuseminutes_solids + stoveuseminutes_clean) == 0, NA, stoveuseminutes_clean),
    stoveuseminutes_solids = 
      ifelse(is.na(stoveuseminutes_clean), NA, stoveuseminutes_solids),
  ) # Creates NAs in case both clean and solid fuel timeuse is 0

# Extract clean fuel usage months
fuelmths <- kenya_main %>% 
  transmute(HHID = PARENT_KEY,
            fuela = ifelse(h_h_5a == 1, naconvert(h_h_10a_count),0)
            ) %>% 
  group_by(HHID) %>% 
  summarise(cleanfuelmths = sum(fuela, na.rm = T)) %>% 
  mutate(cleanfuelmths = ifelse(cleanfuelmths > 12,12,cleanfuelmths)) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID))

# Extract if electricity used for cooking
elcook <- kenya_stove %>% 
  mutate(HHID = PARENT_KEY) %>% 
  group_by(HHID) %>% 
  summarise(elcook = sum(i_18_a_1st_fuel == 16)) %>% 
  mutate(elcook = ifelse(elcook > 0 & !is.na(elcook), 1,0)) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID))

kenya <- left_join(hh, avail, by = c("HHID"))
kenya <- left_join(kenya, elcons, by = c("HHID"))
kenya <- left_join(kenya, elexp, by = c("HHID"))
kenya <- left_join(kenya, fuel, by = c("HHID"))
kenya <- left_join(kenya, fuelmths, by = c("HHID"))
kenya <- left_join(kenya, stove, by = c("HHID"))
kenya <- left_join(kenya, stove_af, by = c("HHID"))
kenya <- left_join(kenya, elcook, by = c("HHID"))
kenya <- left_join(kenya, apps, by = c("HHID"))

kenya <- kenya %>% 
  mutate(Country = "Kenya"
  ) %>% 
  select(names(rwanda))

# NIGER ------------------------------------------------------------------------

niger_main <- read.dta13(here("Data", "niger", "MTF_HH_main.dta"), 
                         convert.factors = F)

niger_elec <- read.dta13(here("Data", "niger", "MTF_Lighting.dta"), 
                         convert.factors = F)

niger_hh <- read.dta13(here("Data", "niger", "MTF_HH_Roaster.dta"), 
                       convert.factors = F)

niger_solar <- read.dta13(here("Data", "niger", "MTF_Solar_products.dta"), 
                          convert.factors = F)

niger_fuel <- read.dta13(here("Data", "niger", "MTF_Fuels.dta"), 
                         convert.factors = F)

niger_stove <- read.dta13(here("Data", "niger", "MTF_Cooking_Solution.dta"), 
                          convert.factors = F)

hh <- niger_hh %>%
  mutate(HHID = HH_ID) %>% 
  group_by(HHID) %>% 
  summarise(
    femalehead = max(natozero(as.numeric(A4 == 1 & A3 == 2)), na.rm = T),
    educ10th = max(natozero(as.numeric(A8 >= 10)), na.rm = T),
    nonfarmemp = max(natozero(as.numeric(A14 %in% c(1,3,4)))))

gen <- niger_elec %>% 
  transmute(HHID = HH_ID,
            admin1 = region,
            admin2 = departement,
            rural = as.numeric(Q5 == 2),
            strata = paste0(region, "_", Q5), # sample is stratified by region and rural/urban
            psu = village,
            weight = pond_menage_R) %>% 
  filter(!is.na(weight)) %>% 
  distinct(HHID, .keep_all = T) # Remove duplicates

hh_exp <- niger_main %>% 
  mutate(across(c(p1a11, p2a11, p3a11, p4a11, p5a11, p6a11, p7a11, p8a11,
                  p9a11, p10a11, p11a11, p12a11, p14:p36), ~naconvert(.))
  ) %>% 
  select(HHID = HH_ID,
         c(p1a11, p2a11, p3a11, p4a11, p5a11, p6a11, p7a11, p8a11,
           p9a11, p10a11, p11a11, p12a11), p14:p36) %>% 
  mutate_all(~naconvert(.)) %>% 
  summarise(
    HHID = HHID,
    hh_exp1 = select(.,p1a11:p12a11) %>% rowSums(., na.rm = T), 
    hh_exp2 = select(.,p14:p22) %>% rowSums(., na.rm = T), 
    hh_exp3 = select(.,p23:p36) %>% rowSums(., na.rm = T) # Annual G&S expenditure
  ) %>%
  mutate(hh_exp1 = hh_exp1*52, # Weekly consumption to annual
         hh_exp2 = hh_exp2*12 # Monthly G&S expenditure to annual
  ) %>% 
  transmute(HHID = HHID,
            hh_exp_annual = select(.,hh_exp1:hh_exp3) %>% rowSums(.,na.rm = T)) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID)) 

hh <- left_join(hh, hh_exp, by = "HHID") 

# Calculate electricity availability, consumption and expenditures
avail <- niger_main %>%
  transmute(HHID = HH_ID,
            availability_grid = C27,
            availability_eve_grid = C28,
            availability_mg = C64,
            availability_eve_mg = C65,
            availability_solar = C151,
            availability_eve_solar = C152,
            availability_dg = C93,
            availability_eve_dg = C94,
            availability_batt = C110,
            availability_eve_batt = C111,
            grid = as.numeric(C3 == 1),
            mg = as.numeric(C40 == 1),
            solar = as.numeric(C1191 == 1 | C1192 == 1 | C1193 == 1),
            batt = as.numeric(C101 == 1),
            dg = as.numeric(C77 == 1)
  ) %>%
  mutate_all(~naconvert(.)) %>%
  rowwise() %>%
  transmute(HHID = HHID,
            availability = max(c(availability_grid, availability_mg, 
                                 availability_solar, availability_batt, 
                                 availability_dg), na.rm = T),
            availability = ifelse(availability < 0, 0, availability), # Cases were max(..) returns -Inf as no modern electricity access
            availability_eve = max(c(availability_eve_grid, availability_eve_mg, 
                                 availability_eve_solar, availability_eve_batt,
                                 availability_eve_dg), na.rm = T),
            availability_eve = ifelse(availability_eve < 0, 0, availability_eve),
            grid = grid,
            mg = mg,
            solar = solar,
            dg = dg,
            batt = batt) %>% 
  filter(HHID %in% hh$HHID)%>% 
  full_join(hh %>% select(HHID))

elexp <- niger_main %>% 
  transmute(
    HHID = HH_ID,
    exp_grid = C24,
    exp_mg = C61,
    exp_batt = C108) %>% 
  mutate(across(c(exp_grid,exp_batt), ~naconvert(.))) %>% 
  filter(HHID %in% hh$HHID)

dgexp <- niger_main %>% 
  transmute(
    HHID = HH_ID,
    dg_om = C87,
    dg_fuel = C91) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  mutate(exp_dg = dg_om/12 + dg_fuel) %>% 
  group_by(HHID) %>% 
  summarise(exp_dg = sum(exp_dg, na.rm = T)) %>% 
  ungroup()

solarexp <- niger_solar %>% 
  select(
    HHID = HH_ID,
    solar_capex = C138) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  mutate(exp_solar = round(annuity.instalment(0.1, n.periods = 5, pv = solar_capex)/12,0)) %>% 
  group_by(HHID) %>% 
  summarise(exp_solar = sum(exp_solar, na.rm = T)) %>% 
  ungroup()

elexp <- left_join(elexp, solarexp) %>% 
  left_join(dgexp) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID)) %>% 
  transmute(HHID = HHID,
            elexp_total = select(.,exp_grid:exp_dg) %>% rowSums(., na.rm = T),
            elexp_total = elexp_total * 12) 

elcons <- niger_main %>% 
  transmute(
    HHID = HH_ID,
    kwh_consumption = natozero(naconvert(C23)) + natozero(naconvert(C60))) %>% 
  filter(HHID %in% hh$HHID)

# Calculate AF services tier
apps <- niger_main %>% 
  mutate(across(Kb1:Kb30, ~natozero(.))) %>% 
  transmute(
    HHID = HH_ID,
    srv_minimum = case_when(
      rowSums(select(., Kb1:Kb4)) > 0 &
        rowSums(select(., Kb25:Kb26)) > 0 ~ 1,
      TRUE ~ 0),
    srv_decent = case_when(
      rowSums(select(., Kb27:Kb29, Kb8, Kb18:Kb19, Kb9)) > 0 ~ 1,
      TRUE ~ 0),
    srv_affluent = case_when(
      rowSums(select(., Kb10, Kb15, Kb16)) > 0 ~ 1,
      TRUE ~ 0)
  ) %>% 
  mutate(
    srv_minimum = as.numeric(
      srv_minimum == 1 & rowSums(select(., srv_decent:srv_affluent)) == 0),
    srv_decent = as.numeric(
      srv_decent == 1 & srv_affluent == 0),
    srv_affluent = srv_affluent,
    srv_none = as.numeric(rowSums(select(., srv_minimum:srv_affluent)) == 0)
  ) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID))

# Extract fuel costs
fuel <- niger_fuel %>%
  mutate(H16 = naconvert(H16)) %>% 
  transmute(HHID = HH_ID,
            exp_cook = H16*4) %>% 
  filter(HHID %in% hh$HHID) %>% 
  group_by(HHID) %>% 
  summarise(cookexp_total = sum(exp_cook, na.rm = T))%>% 
  mutate(cookexp_total = cookexp_total * 12) %>% 
  full_join(hh %>% select(HHID)) 

# Extract stated primary stove and classify whether clean
stove <- niger_stove %>% 
  filter(I33 == 1) %>% 
  transmute(HHID = HH_ID,
            primarystoveclean_stated = as.numeric(I17_1 %in% c(1,12))) %>% 
  filter(HHID %in% hh$HHID)

# Extract primary stove by timeuse (AF) (selects solidfuel stove in edge case)
stove_af <- niger_stove %>% 
  mutate(HHID = HH_ID,
         across(I22a:I24, ~naconvert(.))) %>% 
  mutate(primarystovetype_af = ifelse(I17_1 %in% c(1,12),"clean","solids")
  ) %>% 
  group_by(HHID, primarystovetype_af) %>% 
  summarise(across(c(I22a:I24), ~sum(.,na.rm = T))) %>% 
  group_by(HHID, primarystovetype_af) %>% 
  mutate(stoveuseminutes = I22a + I23 + I24) %>% 
  pivot_wider(id_cols = HHID, names_from = primarystovetype_af,
              values_from = stoveuseminutes, names_prefix = "stoveuseminutes_") %>% 
  mutate(across(matches("stoveuseminutes"), ~natozero(.))
  ) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID)) %>%
  rowwise() %>% 
  mutate(
    stoveuseminutes_clean = 
      ifelse((stoveuseminutes_solids + stoveuseminutes_clean) == 0, NA, stoveuseminutes_clean),
    stoveuseminutes_solids = 
      ifelse(is.na(stoveuseminutes_clean), NA, stoveuseminutes_solids),
  ) # Creates NAs in case both clean and solid fuel timeuse is 0

# Extract clean fuel usage months
fuelmths <- niger_fuel %>% 
  filter(H3 %in% c(1,12), H4_102 == 1) %>% 
  mutate(HHID = HH_ID) %>% 
  group_by(HHID) %>% 
  summarise(cleanfuelmths = sum(naconvert(H11), na.rm = T)) %>% 
  mutate(cleanfuelmths = ifelse(cleanfuelmths > 12,12,cleanfuelmths)) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID))

# Extract if electricity used for cooking
elcook <- niger_fuel %>% 
  filter(H3 %in% c(12)) %>% 
  mutate(HHID = HH_ID) %>% 
  group_by(HHID) %>% 
  summarise(elcook = sum(H4_102 == 1)) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID))

niger <- left_join(hh, gen, by = c("HHID")) %>% 
  filter(!is.na(weight))
niger <- left_join(niger, avail, by = c("HHID"))
niger <- left_join(niger, elcons, by = c("HHID"))
niger <- left_join(niger, elexp, by = c("HHID"))
niger <- left_join(niger, fuel, by = c("HHID"))
niger <- left_join(niger, fuelmths, by = c("HHID"))
niger <- left_join(niger, stove, by = c("HHID"))
niger <- left_join(niger, stove_af, by = c("HHID"))
niger <- left_join(niger, elcook, by = c("HHID"))
niger <- left_join(niger, apps, by = c("HHID"))

niger <- niger %>% 
  mutate(Country = "Niger"
  ) %>% 
  select(names(rwanda))

# STP --------------------------------------------------------------------------

stp_main <- read.dta13(here("Data", "saotomeandprincipe", "stp_main.dta"), 
                         convert.factors = F)

hh <- stp_main %>% 
  select(HHID = HHID_region_district_locality, gender_1:econactivity_days_13) %>% 
  mutate(across(gender_1:econactivity_days_13, ~as.character(.))) %>% 
  pivot_longer(cols = -HHID, names_pattern = "(.+)_(\\d+)", 
               names_to = c(".value", "set")) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  group_by(HHID) %>% 
  summarise(
    femalehead = max(as.numeric(gender == 2 & relation_head == 1), na.rm = T),
    educ10th = max(as.numeric(grade_feq %in% c(11:16)), na.rm = T),
    nonfarmemp = max(as.numeric(mainoccupation %in% c(1,3,4)))) %>% 
  mutate(
    hh_exp_annual = NA # Expenditures not provided in STP dataset
  )

gen <- stp_main %>% 
  transmute(HHID = HHID_region_district_locality,
            admin1 = region,
            admin2 = district,
            strata = paste0(region,"_", environment), # assumes stratification at province by rural/urban
            psu = locality,
            rural = as.numeric(environment == "Rural"),
            weight = final_weight
  ) %>% 
  filter(weight != " ")

# Calculate electricity availability, consumption and expenditures
avail <- stp_main %>% 
  transmute(HHID = HHID_region_district_locality,
            grid = as.numeric(energy_source == 1),
            mg = as.numeric(energy_source == 2),
            solar = as.numeric(energy_source %in% c(6,7,8)),
            batt = as.numeric(energy_source == 4),
            dg = as.numeric(energy_source == 3),
            availability_grid = nathours_day,
            availability_eve_grid = nathours_night,
            availability_mg = minihours_day,
            availability_eve_mg = minihours_night,
            availability_solar = solarhours_day,
            availability_eve_solar = solarhours_night,
            availability_batt = bathours_day,
            availability_eve_batt = bathours_night,
            availability_dg = genhours_day,
            availability_eve_dg = genhours_night) %>% 
  filter(HHID %in% hh$HHID) %>% 
  mutate_all(~naconvert(.)) %>% 
  rowwise() %>% 
  transmute(HHID = HHID,
            grid = grid, mg = mg, solar = solar, batt = batt, dg = dg,
            availability = max(availability_grid, availability_mg, 
                               availability_solar, availability_batt, 
                               availability_dg, na.rm = T),
            availability = ifelse(availability < 0, 0, availability), # Cases were max(..) returns -Inf as no modern electricity access
            availability_eve = max(availability_eve_grid, availability_eve_mg, 
                               availability_eve_solar, availability_eve_batt, 
                               availability_eve_dg, na.rm = T),
            availability_eve = ifelse(availability_eve < 0, 0, availability_eve))

elexp <- stp_main %>% 
  transmute(
    HHID = HHID_region_district_locality,
    exp_grid = natpaid_hh,
    exp_mg = minipaid_hh,
    exp_batt = charging_bathh) %>%
  mutate(across(exp_grid:exp_batt, ~naconvert(.)))

dgexp <- stp_main %>% 
  transmute(
    HHID = HHID_region_district_locality,
    dg_rent = p_gen,
    dg_om = maintain_gen,
    dg_fuel = paid_fuel) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  mutate(exp_dg = dg_om/12 + dg_fuel + dg_rent) %>% 
  group_by(HHID) %>% 
  summarise(exp_dg = sum(exp_dg, na.rm = T)) %>% 
  ungroup()

solarexp <- stp_main %>% 
  transmute(
    HHID = HHID_region_district_locality,
    solar_capex = fullhalf_psolar,
    solar_payg = p_solar) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  mutate(exp_solar = round(annuity.instalment(0.1, n.periods = 5, pv = solar_capex)/12 + 
                             solar_payg,0)) %>% 
  group_by(HHID) %>% 
  summarise(exp_solar = sum(exp_solar, na.rm = T)) %>% 
  ungroup()

elexp <- left_join(elexp, solarexp) %>%
  left_join(dgexp) %>% 
  transmute(HHID = HHID,
            elexp_total = select(.,exp_grid:exp_dg) %>% rowSums(., na.rm = T),
            elexp_total = elexp_total * 12) 

elcons <- stp_main %>% 
  transmute(
    HHID = HHID_region_district_locality,
    kwh_consumption_grid = natconsumption_hh,
    kwh_consumption_mg = miniconsumption_hh
  ) %>%
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  group_by(HHID) %>% 
  transmute(kwh_consumption = kwh_consumption_grid + kwh_consumption_mg)

# Calculate AF services tier
apps <- stp_main %>% 
  select(HHID = HHID_region_district_locality,
         k1:k35) %>% 
  mutate_all(~natozero(.)) %>% 
  transmute(
    HHID = HHID,
    srv_minimum = case_when(
      rowSums(select(., k1:k4)) > 0  &
        rowSums(select(., k28:k29)) > 0 ~ 1,
      TRUE ~ 0),
    srv_decent = case_when(
      rowSums(select(., k12, k21, k30:k32, k15)) > 0 ~ 1,
      TRUE ~ 0),
    srv_affluent = case_when(
      rowSums(select(., k16:k17, k19)) > 0 ~ 1,
      TRUE ~ 0)
  ) %>% 
  mutate(
    srv_minimum = as.numeric(
      srv_minimum == 1 & rowSums(select(., srv_decent:srv_affluent)) == 0),
    srv_decent = as.numeric(
      srv_decent == 1 & srv_affluent == 0),
    srv_affluent = srv_affluent,
    srv_none = as.numeric(rowSums(select(., srv_minimum:srv_affluent)) == 0)
  ) %>% 
  full_join(hh %>% select(HHID))

# # Extract fuel costs
fuel <- stp_main %>%
  select(HHID = HHID_region_district_locality, matches("fuelcode"),
         matches("p_dfuel"), matches("purpose_dfuel_2_"), -matches("unit")) %>% 
  pivot_longer(cols = -HHID, names_pattern = "(.+)_(\\d+)", 
               names_to = c(".value", "set")) %>% 
  mutate_all(~naconvert(.)) %>% 
  filter(fuelcode != 0, purpose_dfuel_2 == 1) %>% 
  group_by(HHID) %>% 
  summarise(cookexp_total = sum(p_dfuel, na.rm = T)) %>% 
  mutate(cookexp_total = cookexp_total * 12) %>% 
  full_join(hh %>% select(HHID))
  
# Extract stated primary stove and classify whether clean
stove <- stp_main %>% 
  select(HHID = HHID_region_district_locality, matches("fuel1st_stove_"),
         matches("mainuse")) %>% 
  pivot_longer(cols = -HHID, names_pattern = "(.+)_(\\d+)", 
               names_to = c(".value", "set")) %>% 
  filter(mainuse_stove == 1) %>% 
  group_by(HHID) %>% 
  summarise(primarystoveclean_stated = max(as.numeric(fuel1st_stove %in% c(1,11,13)))) %>% 
  full_join(hh %>% select(HHID)) 

# Extract primary stove by timeuse (AF) (selects solidfuel stove in edge case)
stove_af <- stp_main %>% 
  select(HHID = HHID_region_district_locality, matches("fuel1st_stove_"),
         matches("morning_stove"), matches("afternoon_stove"), matches("night_stove")) %>% 
  pivot_longer(cols = -HHID, names_pattern = "(.+)_(\\d+)", 
               names_to = c(".value", "set")) %>% 
  mutate(primarystovetype_af = ifelse(fuel1st_stove %in% c(1,11,13),"clean","solids"),
         across(morning_stove:night_stove, ~naconvert(.))
         ) %>%
  group_by(HHID, primarystovetype_af) %>% 
  summarise(across(c(morning_stove:night_stove), ~sum(.,na.rm = T))) %>% 
  group_by(HHID, primarystovetype_af) %>% 
  mutate(stoveuseminutes = morning_stove+afternoon_stove+night_stove) %>% 
  pivot_wider(id_cols = HHID, names_from = primarystovetype_af,
              values_from = stoveuseminutes, names_prefix = "stoveuseminutes_") %>% 
  mutate(across(matches("stoveuseminutes"), ~natozero(.))
  ) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID)) %>%
  rowwise() %>% 
  mutate(
    stoveuseminutes_clean = 
      ifelse((stoveuseminutes_solids + stoveuseminutes_clean) == 0, NA, stoveuseminutes_clean),
    stoveuseminutes_solids = 
      ifelse(is.na(stoveuseminutes_clean), NA, stoveuseminutes_solids),
  ) # Creates NAs in case both clean and solid fuel timeuse is 0

# Extract clean fuel usage months
fuelmths <- stp_main %>%
  select(HHID = HHID_region_district_locality, matches("fuelcode"),
         matches("monthuse_dfuel"), matches("purpose_dfuel_2_")) %>% 
  pivot_longer(cols = -HHID, names_pattern = "(.+)_(\\d+)", 
               names_to = c(".value", "set")) %>% 
  mutate_all(~naconvert(.)) %>% 
  filter(fuelcode %in% c(1,6,13), purpose_dfuel_2 == 1) %>% 
  mutate(mthsfuelused = ifelse(monthuse_dfuel > 12,12,monthuse_dfuel)) %>% 
  group_by(HHID) %>% 
  summarise(cleanfuelmths = sum(mthsfuelused, na.rm = T)) %>% 
  mutate(cleanfuelmths = ifelse(cleanfuelmths > 12,12,cleanfuelmths)) %>% 
  full_join(hh %>% select(HHID))

# Extract if electricity used for cooking
elcook <- stp_main %>% 
  select(HHID = HHID_region_district_locality, matches("fuel1st_stove_"),
         matches("mainuse")) %>% 
  pivot_longer(cols = -HHID, names_pattern = "(.+)_(\\d+)", 
               names_to = c(".value", "set")) %>% 
  group_by(HHID) %>% 
  summarise(elcook = max(as.numeric(fuel1st_stove %in% c(13))))

stp <- left_join(gen, hh, by = c("HHID"))
stp <- left_join(stp, avail, by = c("HHID"))
stp <- left_join(stp, elcons, by = c("HHID"))
stp <- left_join(stp, elexp, by = c("HHID"))
stp <- left_join(stp, fuel, by = c("HHID"))
stp <- left_join(stp, fuelmths, by = c("HHID"))
stp <- left_join(stp, stove, by = c("HHID"))
stp <- left_join(stp, stove_af, by = c("HHID"))
stp <- left_join(stp, elcook, by = c("HHID"))
stp <- left_join(stp, apps, by = c("HHID"))

stp <- stp %>% 
  mutate(Country = "STP") %>% 
  select(names(rwanda))

# ZAMBIA -----------------------------------------------------------------------

zambia_main <- read.dta13(here("Data", "zambia", "EXPRESS SECTIONS.dta"), 
                             convert.factors = F)

zambia_hh <- read.dta13(here("Data", "zambia", "SECTION A  HOUSEHOLD ROSTER.dta"), 
                          convert.factors = F)

zambia_weights <- read.dta13(here("Data", "zambia", "sample_weight.dta"))

zambia_exp <- read.dta13(here("Data", "zambia", "SECTION L CONSUMPTION_EXPENDITURE.dta"), 
                         convert.factors = F)

zambia_avail <- read.dta13(here("Data", "zambia", "SECTION C SUPPLY AND DEMAND FOR ELECTRICTY C120-C123.dta"), 
                         convert.factors = F)

zambia_solar <- read.dta13(here("Data", "zambia", "SECTION C SOLAR DEVICES.dta"), 
                           convert.factors = F)

zambia_apps <- read.dta13(here("Data", "zambia", "SECTION N HOUSEHOLD ASSETS ELECTRIC.dta"), 
                           convert.factors = F)

zambia_fuel <- read.dta13(here("Data", "zambia", "SECTION H HOUSEHOLD FUEL CONSUMPTION H1-H18.dta"), 
                          convert.factors = F)

zambia_stove <- read.dta13(here("Data", "zambia", "SECTION I USE OF COOKSTOVES.dta"), 
                          convert.factors = F)

gen <- zambia_main %>%
  transmute(HHID = HouseholdID,
            admin1 = PROVINCE,
            admin2 = paste0(PROVINCE, "_", DISTRICT),
            rural = as.numeric(LOCALITY == 2),
            strata = paste0(PROVINCE,"_",LOCALITY), # sample is stratified by region and rural/urban
            psu = EANUM) %>% 
  left_join(zambia_weights %>% select(HHID = HouseholdID, weight = hh_weight)) %>% 
  filter(!is.na(weight))

hh <- zambia_hh %>%
  mutate(HHID = HouseholdID) %>% 
  group_by(HHID) %>% 
  summarise(
    femalehead = max(natozero(as.numeric(A3 == 2 & A4 == 1)), na.rm = T),
    educ10th = max(natozero(as.numeric(A9 %in% c(3:7))), na.rm = T),
    nonfarmemp = max(natozero(as.numeric(A14 %in% c(1,3,4)))))

hh_exp <- zambia_exp %>% 
  mutate(
    HHID = HouseholdID,
    type = case_when(
    LITEM %in% c(1:13) ~ 52, # weekly expenditures
    LITEM %in% c(14:22) ~ 12, # monthly expenditures
    LITEM %in% c(23:36) ~ 1) # annual expenditures
  ) %>% 
  group_by(HHID, type) %>% 
  summarise(exp = sum(naconvert(L2A), na.rm = T)) %>% 
  mutate(exp_final = exp*type) %>% 
  group_by(HHID) %>% 
  summarise(hh_exp_annual = sum(exp_final))

hh <- left_join(hh, hh_exp, by = "HHID")

# Calculate electricity availability, consumption and expenditures
avail <- zambia_main %>%
  transmute(HHID = HouseholdID,
            availability_grid = C28B,
            availability_eve_grid = C29B,
            availability_mg = C68B,
            availability_eve_mg = C69B,
            availability_solar = C168B,
            availability_eve_solar = C169B,
            availability_batt = C128,
            availability_eve_batt = C129,
            availability_dg = C106B,
            availability_eve_dg = C107B,
            grid = as.numeric(C2 == 1),
            mg = as.numeric(C43 == 1),
            solar = as.numeric(natozero(C136) > 0 | natozero(C137) > 0 | 
                                 natozero(C138) > 0),
            batt = as.numeric(C114 == 1),
            dg = as.numeric(C83 == 1)
  ) %>%
  mutate_all(~naconvert(.))

avail <- avail %>%
  rowwise() %>%
  transmute(HHID = HHID,
            availability = max(availability_grid, availability_mg,
                               availability_solar, availability_batt, 
                               availability_dg, na.rm = T),
            availability = ifelse(availability < 0, 0, availability), # Cases were max(..) returns -Inf as no modern electricity access
            availability_eve = max(availability_eve_grid, availability_eve_mg,
                               availability_eve_solar, availability_eve_batt, 
                               availability_eve_dg, na.rm = T),
            availability_eve = ifelse(availability_eve < 0, 0, availability_eve),
            grid = grid,
            mg = mg,
            solar = solar,
            batt = batt,
            dg = dg)

elexp <- zambia_main %>% 
  transmute(
    HHID = HouseholdID,
    exp_grid = C22,
    exp_mg = C63) %>% 
  mutate(across(exp_grid:exp_mg, ~naconvert(.)))

dgexp <- zambia_main %>% 
  transmute(
    HHID = HouseholdID,
    dg_capex = C97,
    dg_rent = C96,
    dg_om = C98,
    dg_fuel = C102) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  mutate(exp_dg = round(annuity.instalment(0.1, n.periods = 5, pv = dg_capex)/12 + 
                          dg_om/12 + dg_fuel + dg_rent,0)) %>% 
  group_by(HHID) %>% 
  summarise(exp_dg = sum(exp_dg, na.rm = T)) %>% 
  ungroup()

solarexp <- zambia_solar %>% 
  select(
    HHID = HouseholdID,
    solar_capex = C156,
    solar_downpayment = C157,
    solar_payg = C161) %>% 
  mutate_all(~naconvert(.)) %>% 
  mutate_all(~natozero(.)) %>% 
  mutate(exp_solar = round(annuity.instalment(0.1, n.periods = 5, pv = solar_capex)/12 + 
                             solar_payg + solar_downpayment,0)) %>% 
  group_by(HHID) %>% 
  summarise(exp_solar = sum(exp_solar, na.rm = T)) %>% 
  ungroup()

elexp <- left_join(elexp, solarexp) %>%
  left_join(dgexp) %>% 
  transmute(HHID = HHID,
            elexp_total = select(.,exp_grid:exp_dg) %>% rowSums(., na.rm = T),
            elexp_total = elexp_total * 12) 

elcons <- zambia_main %>% 
  mutate(across(c(C23,C64), ~naconvert(.))
  ) %>% 
  transmute(
    HHID = HouseholdID,
    kwh_consumption = select(.,c(C23,C64)) %>% rowSums(.,na.rm = T))

# Calculate AF services tier
apps <-zambia_apps %>%
  transmute(HHID = HouseholdID, 
            NITEM = paste0("app_", NITEM), 
            N1A = as.numeric(natozero(naconvert(N1B))>0)) %>%
  spread( NITEM, N1A) %>% 
  mutate_all(~natozero(naconvert(.)))

apps <- apps %>% 
  transmute(
    HHID = HHID,
    srv_minimum = case_when(
      rowSums(select(., app_1, app_2, app_3, app_4), na.rm = T) > 0 &
        rowSums(select(., app_25, app_26), na.rm = T) > 0 ~ 1,
      TRUE ~ 0),
    srv_decent = case_when(
      rowSums(select(., app_27,app_28, app_29, app_8, 
                     app_18, app_19, app_9)) > 0 ~ 1,
      TRUE ~ 0),
    srv_affluent = case_when(
      rowSums(select(., app_10, app_15, app_16)) > 0 ~ 1,
      TRUE ~ 0)
  ) %>% 
  mutate(
    srv_minimum = as.numeric(
      srv_minimum == 1 & rowSums(select(., srv_decent:srv_affluent)) == 0),
    srv_decent = as.numeric(
      srv_decent == 1 & srv_affluent == 0),
    srv_affluent = srv_affluent,
    srv_none = as.numeric(rowSums(select(., srv_minimum:srv_affluent)) == 0)
  ) %>% 
  full_join(gen %>% select(HHID))

# Extract fuel costs
fuel <- zambia_fuel %>%
  filter(grepl(H4, pattern = "B"), H2 != 14) %>%  # Filter electricity for cooking as captured already
  mutate(HHID = HouseholdID,
         H16 = naconvert(H16)) %>% 
  group_by(HHID) %>% 
  summarise(cookexp_total = sum(H16, na.rm = T)) %>% 
  mutate(cookexp_total = cookexp_total * 12) %>% 
  full_join(gen %>% select(HHID))

# Extract stated primary stove and classify whether clean
stove <- zambia_stove %>% 
  mutate(HHID = HouseholdID,
         primarystovetype = as.numeric(I19A %in% c(1,7,14,16) & I33 == 1)
  ) %>% 
  group_by(HHID) %>% 
  summarise(primarystoveclean_stated = max(primarystovetype)) %>% 
  full_join(gen %>% select(HHID))

# Extract primary stove by timeuse (AF) (selects solidfuel stove in edge case)
stove_af <- zambia_stove %>% 
  mutate(HHID = HouseholdID,
         across(I24:I26, ~naconvert(.))) %>% 
  mutate(primarystovetype_af = ifelse(I19A %in% c(1,7,14,16),"clean","solids")
  ) %>% 
  group_by(HHID, primarystovetype_af) %>% 
  summarise(across(c(I24:I26), ~sum(.,na.rm = T))) %>% 
  group_by(HHID, primarystovetype_af) %>% 
  mutate(stoveuseminutes = I24 + I25 + I26) %>% 
  pivot_wider(id_cols = HHID, names_from = primarystovetype_af,
              values_from = stoveuseminutes, names_prefix = "stoveuseminutes_") %>% 
  mutate(across(matches("stoveuseminutes"), ~natozero(.))
  ) %>% 
  filter(HHID %in% hh$HHID) %>% 
  full_join(hh %>% select(HHID)) %>%
  rowwise() %>% 
  mutate(
    stoveuseminutes_clean = 
      ifelse((stoveuseminutes_solids + stoveuseminutes_clean) == 0, NA, stoveuseminutes_clean),
    stoveuseminutes_solids = 
      ifelse(is.na(stoveuseminutes_clean), NA, stoveuseminutes_solids),
  ) # Creates NAs in case both clean and solid fuel timeuse is 0

# Extract clean fuel usage months (MUST BE CORRECTED FOR ELECTRIC COOKING)
fuelmths <- zambia_fuel %>%
  filter(grepl(H4, pattern = "B"),
         H2 %in% c(1,7,14,16)) %>% 
  transmute(HHID = HouseholdID,
         cleanfuelmths = naconvert(H11)) %>% 
  full_join(gen %>% select(HHID))

# Extract if electricity used for cooking
elcook <- zambia_stove %>% 
  mutate(HHID = HouseholdID) %>% 
  group_by(HHID) %>% 
  summarise(elcook = sum(I19A == 14)) %>% 
  mutate(elcook = ifelse(elcook > 0 & !is.na(elcook), 1,0)) %>% 
  full_join(gen %>% select(HHID))

# Correct clean fuel months for electric cooking
fuelmths <- left_join(elcook, fuelmths) %>% 
  mutate(cleanfuelmths = ifelse(elcook == 1, 12, natozero(cleanfuelmths)))

zambia <- left_join(gen, hh, by = c("HHID"))
zambia <- left_join(zambia, avail, by = c("HHID"))
zambia <- left_join(zambia, elcons, by = c("HHID"))
zambia <- left_join(zambia, elexp, by = c("HHID"))
zambia <- left_join(zambia, fuel, by = c("HHID"))
zambia <- left_join(zambia, fuelmths, by = c("HHID"))
zambia <- left_join(zambia, stove, by = c("HHID"))
zambia <- left_join(zambia, stove_af, by = c("HHID"))
zambia <- left_join(zambia, apps, by = c("HHID"))

zambia <- zambia %>% 
  mutate(Country = "Zambia"
  ) %>% 
  select(names(rwanda))

# EXPORT EXTRACTED DATASETS ----------------------------------------------------

allcountries <- rbind(rwanda,ethiopia) %>% 
  rbind(cambodia) %>% 
  rbind(myanmar) %>% 
  rbind(honduras) %>% 
  rbind(nepal) %>% 
  rbind(kenya) %>% 
  rbind(niger) %>% 
  rbind(stp) %>% 
  rbind(zambia)

readr::write_csv(allcountries, here::here("Data", "processed", "allcountries.csv"))

# FINAL CLEANING ---------------------------------------------------------------

# Read in data
allcountries <- readr::read_csv(here::here("Data", "processed", "allcountries.csv"),
                                col_types = cols(.default = col_double(), HHID = col_factor(),
                                                 Country = col_factor(), 
                                                 admin1 = col_factor(), admin2 = col_factor(), 
                                                 strata = col_factor(), psu = col_factor()))

# Correct electricity data - Correct NAs in grid, mg and solar use which correspond to no access 
allcountries_cleaned <- allcountries_cleaned %>% 
  mutate(across(c(grid, mg, solar, dg, batt), ~natozero(.)))

# Calculate SDG7 access indicators (assuming NAs indicate no access)
allcountries_cleaned <- allcountries_cleaned %>% 
  mutate(
    elec_sdg = as.numeric(natozero(grid) == 1 | natozero(mg) == 1 | natozero(solar) == 1 | natozero(dg) == 1 | natozero(batt) == 1),
    cook_sdg = as.numeric(natozero(primarystoveclean_stated) == 1))

# Correct electricity data - Set services to 0 when all NA (in-country data merging artifact)
allcountries_cleaned <- allcountries_cleaned %>% 
  mutate(across(c(srv_none, srv_minimum, srv_decent, srv_affluent), ~natozero(.)),
         srv_none = as.numeric(srv_minimum + srv_decent + srv_affluent == 0))

# Correct electricity data - Fix availability for cases where an NA was converted to 0 (data merging artifact)
allcountries_cleaned <- allcountries_cleaned %>% 
  mutate(availability = case_when(
    elec_sdg == 1 & availability == 0 ~ NA_real_,
    TRUE ~ availability),
    availability_eve = case_when(
      elec_sdg == 1 & availability_eve == 0 ~ NA_real_,
      TRUE ~ availability_eve))

# Correct electricity data - Modify electricity expenditures to NA if household has access but reports no expenditures
allcountries_cleaned <- allcountries_cleaned %>% 
  mutate(
    elexp_total = case_when(
      elec_sdg == 1 & elexp_total == 0 ~ NA_real_,
      TRUE ~ elexp_total)
  )

# Correct cooking data - Correct stove use minutes to NA for households that are using some form of clean cookstove but report 0 minutes usage (data merging artifact)
allcountries_cleaned <- allcountries_cleaned %>% 
  mutate(stoveuseminutes_clean = ifelse(primarystoveclean_stated == 1 & stoveuseminutes_clean == 0, NA, stoveuseminutes_clean))

# Correct cooking data - Correct clean fuel months to NA for households that are using some form of clean cookstove but report 0 months (data merging artifact)
allcountries_cleaned <- allcountries_cleaned %>% 
  mutate(cleanfuelmths = case_when(
    primarystoveclean_stated == 1 & cleanfuelmths == 0 ~ NA_real_, 
    TRUE ~ cleanfuelmths))

# Correct cooking data - Modify cooking expenditures to NA if household has clean stove but reports no expenditures
allcountries_cleaned <- allcountries_cleaned %>% 
  mutate(
    cookexp_total = case_when(
      primarystoveclean_stated == 1 & cookexp_total == 0 ~ NA_real_,
      TRUE ~ cookexp_total))

# Calculate expenditure quintiles and median expenditures
allcountries_cleaned <- allcountries_cleaned %>% 
  group_by(Country) %>% 
  mutate(
    expgrp_quint = as.numeric(
      cut(hh_exp_annual, breaks = 
            quantile(hh_exp_annual, probs = seq(0,1,.2), na.rm = T), 
          include.lowest = T, na.rm = T))
  ) %>% 
  group_by(Country, expgrp_quint) %>% 
  mutate(medexp_quint = median(hh_exp_annual, na.rm = T)
  ) %>% 
  ungroup() %>% 
  mutate(
    expgrp_quint = case_when(
      Country %in% c("Rwanda", "STP") ~ NA_real_,
      TRUE ~ expgrp_quint),
    medexp_quint = case_when(
      Country %in% c("Rwanda", "STP") ~ NA_real_,
      TRUE ~ medexp_quint)
  )

# Calculate AF affordability thresholds
allcountries_cleaned <- allcountries_cleaned %>%
  mutate(
    elcook = natozero(elcook), # Sets electric cooking to zero if NA
    elec_affordability_af = case_when(
      elcook == 0 ~ as.numeric(elexp_total <= medexp_quint*0.05), 
      elcook == 1 ~ as.numeric((elexp_total + cookexp_total) <= medexp_quint*0.10)),
    elec_affordability_af = ifelse(Country %in% c("Rwanda", "STP"), NA, elec_affordability_af),
    cook_affordability_af = case_when(
      elcook == 0 ~ as.numeric(cookexp_total <= medexp_quint*0.05),
      elcook == 1 ~ as.numeric((elexp_total + cookexp_total) <= medexp_quint*0.10)),
    cook_affordability_af = ifelse(Country %in% c("Rwanda", "STP"), NA, cook_affordability_af),
  )

# Calculate AF decent electricity access
allcountries_cleaned <- allcountries_cleaned %>% 
  rowwise() %>% 
  mutate(
    # first create AF dimensions as-is
    availability_af = as.numeric(availability >= 16),
    srv_af = as.numeric(sum(srv_decent, srv_affluent, na.rm = T) > 0),
    elec_affordability_af = elec_affordability_af,
    # now calculate af
    elec_af = as.numeric(availability_af == 1 & srv_af == 1 & elec_affordability_af == 1),
    elec_af = ifelse(is.na(availability_af) | is.na(srv_af) | is.na(elec_affordability_af), NA, elec_af)
  ) %>%
  ungroup()

# Calculate AF decent cooking access
allcountries_cleaned <- allcountries_cleaned %>% 
  mutate(
    # first create AF dimensions as-is
    primarystoveclean_af = as.numeric(stoveuseminutes_clean / (stoveuseminutes_clean +
                                                stoveuseminutes_solids) >= 0.8),
    cleanfuelmths_af = as.numeric(cleanfuelmths >= 10),
    cook_affordability_af = cook_affordability_af,
    cook_af = as.numeric(primarystoveclean_af == 1 & cleanfuelmths_af == 1 & cook_affordability_af == 1),
    cook_af = ifelse(is.na(primarystoveclean_af) | is.na(cleanfuelmths_af) | is.na(cook_affordability_af), NA, cook_af)
  ) %>% 
  ungroup()

# Set AF dimensions to never be better than the CF (e.g. fix case where supply is "affordable" because household has no access to electricity)
allcountries_cleaned <- allcountries_cleaned %>% 
  mutate(
    across(c(availability_af, srv_af, elec_affordability_af, elec_af), ~ifelse(elec_sdg == 0,NA,.)),
    across(c(primarystoveclean_af, cleanfuelmths_af, cook_affordability_af, cook_af), ~ifelse(cook_sdg == 0,NA,.))
    )

readr::write_csv(allcountries_cleaned, here::here("Data", "processed", "allcountries_cleaned.csv"))

# DESCRIPTIVE ANALYSIS ---------------------------------------------------------

allcountries_cleaned <- readr::read_csv(
  here::here("Data", "processed", "allcountries_cleaned.csv"),
  col_types = cols(.default = col_double(), HHID = col_factor(),
                   Country = col_factor(), 
                   admin1 = col_factor(), admin2 = col_factor(), 
                   strata = col_factor(), psu = col_factor()))

# Summmary statistics ----------------------------------------------------------

a <- allcountries_cleaned %>% 
  select(Country, elec_sdg, elec_af, availability_af, srv_af, elec_affordability_af) %>% 
  group_by(Country) %>% 
  summarise("Total Households" = n(), elec_sdg = sum(elec_sdg == 1), 
            across(c(elec_af, availability_af, srv_af, elec_affordability_af), 
                   ~sum(!is.na(.)))
  ) %>% 
  transmute(Country = Country, `Total Households`, `SDG7.1.1 Households` = elec_sdg,
         "Energy Services Completeness" = srv_af, 
         "Supply Availability Completeness" = availability_af,
         "Supply Affordability Completeness" = elec_affordability_af) %>% 
  mutate(across(c(`Energy Services Completeness`:`Supply Affordability Completeness`), 
                ~scales::percent(./`SDG7.1.1 Households`, accuracy = 1)))

names <- pull(a,1)
a_T <- as.data.frame(as.matrix(t(a[,-1])))
names(a_T) <- names

b <- allcountries_cleaned %>% 
  select(Country, cook_sdg, cook_af, cleanfuelmths_af, primarystoveclean_af, cook_affordability_af) %>% 
  group_by(Country) %>% 
  summarise(cook_sdg = sum(cook_sdg == 1), 
            across(c(cook_af, cleanfuelmths_af, primarystoveclean_af, cook_affordability_af), 
                   ~sum(!is.na(.)))
  ) %>% 
  transmute(Country = Country, `SDG7.1.2 Households` = cook_sdg,
            "Stove Time-use Completeness" = primarystoveclean_af, 
            "Fuel Availability Completeness" = cleanfuelmths_af,
            "Fuel Affordability Completeness" = cook_affordability_af)%>% 
  mutate(across(c(`Stove Time-use Completeness`:`Fuel Affordability Completeness`), 
                ~scales::percent(./`SDG7.1.2 Households`, accuracy = 1)))

names <- pull(b,1)
b_T <- as.data.frame(as.matrix(t(b[,-1])))
names(b_T) <- names

c <- rbind(a_T,b_T)

stargazer::stargazer(c, out = here("Manuscript", "Tables", "completecases_sdg.tex"),
                     summary = F, float = F)

# Comparing AF and SDG indicators at national level (electricity) --------------

# Create barplots comparing CF and AF, and visualise missing data for transparency
allcountry_elec_nataf <- allcountries_cleaned %>%
  mutate(
    across(c(availability_af, elec_affordability_af, srv_af),
           ~case_when(
             elec_sdg == 0 ~ "Unserved",
             . == 1 ~ "Decent",
             . == 0 ~ "Not Decent",
             TRUE ~ "Missing"), .names = "{.col}_plot"),
    elec_SDG = factor(case_when(
      elec_sdg == 1 ~ "Served",
      elec_sdg == 0 ~ "Unserved"), levels = c("Unserved", "Missing", "Served")),
    elec_AF = factor(case_when(
      elec_af == 1 ~ "Decent",
      elec_af == 0 ~ "Not Decent",
      elec_sdg == 0 ~ "Unserved",
      TRUE ~ "Missing"), levels = c("Unserved", "Missing", "Not Decent", "Decent")),
    elec_SDGandAF = factor(case_when(
      elec_af == 1 & elec_sdg == 1 ~ "Served (CFandAF)",
      elec_sdg == 1 ~ "Served (CFonly)",
      elec_sdg == 0 ~ "Unserved"), levels = c("Unserved", "Served (CFonly)", "Served (CFandAF)")),
    Group = case_when(
      elec_SDGandAF == "Served (CFandAF)" ~ "NA",
      elec_SDGandAF == "Unserved" ~ "NA",
      elec_affordability_af_plot %in% c("Decent") & 
        availability_af_plot %in% c("Decent") & 
        srv_af_plot == "Not Decent" ~ "Services",
      elec_affordability_af_plot %in% c("Decent") & 
        availability_af_plot == "Not Decent" & 
        srv_af_plot %in% c("Decent") ~ "Availability",
      elec_affordability_af_plot == "Not Decent" & 
        availability_af_plot %in% c("Decent") & 
        srv_af_plot %in% c("Decent") ~ "Affordability",
      elec_AF == "Missing" & elec_SDG == "Served" ~ "Missing",
      TRUE ~ "Several"),
    Group = factor(Group, levels = c("Affordability", "Availability", "Services", "Several", "Missing")),
    Region = ifelse(rural == 1, "Rural", "Urban")
  ) 

nat_elec_group <- allcountry_elec_nataf %>% 
  transmute(
    Country = Country, 
    "CF Aggregate" = elec_sdg,
    Affordability = elec_affordability_af,
    Availability = availability_af, 
    Services = srv_af,
    strata = strata,
    weight = weight) %>% 
  srvyr::as_survey(., strata = strata, weights = weight) %>% 
  group_by(Country) %>% 
  summarise(`CF Aggregate` = survey_mean(`CF Aggregate`),
            across(c(Affordability, Availability, Services), ~survey_mean(., na.rm = T))) %>% 
  select(-matches("_se")) %>% 
  pivot_longer(cols = c(Affordability, Availability, Services)) %>% 
  mutate(Country = factor(Country, levels = c("Rwanda", "Cambodia", "Myanmar", "Honduras", 
                                              "Ethiopia", "Nepal", "Kenya",
                                              "Niger", "STP", "Zambia"))
  )

elec_a <- nat_elec_group %>% 
  ggplot(aes(Country, value*`CF Aggregate`, fill = name, label = ifelse(value > 0.01,scales::percent(value, accuracy = 1), ''))) +
  geom_col(aes(Country, `CF Aggregate`/3), inherit.aes = F, alpha = 0.5) +
  geom_text(aes(Country, `CF Aggregate`, label = scales::percent(`CF Aggregate`, accuracy = 1)), 
                inherit.aes = F, nudge_x = 0.1, hjust  = -0.4, alpha = 0.2, vjust = 1) +
  geom_col(position = position_dodge2()) +
  geom_text(position = position_dodge2(width = 0.9), show.legend = F, size = 3) +
  coord_flip() +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(0,1)) +
  scale_fill_manual(values = c("#0571b0","#fe9929","#88419d","#e31a1c"), 
                    breaks = c("Affordability", "Availability", "Services")) + 
  labs(y = "Share of households",
       subtitle = "SDG7.1.1 (Electricity) Status & AF Dimensions",
       fill = "AF Dimension") +
  theme_bw()

nat_elecalluvial <- allcountry_elec_nataf %>% 
  srvyr::as_survey(., strata = strata, weights = weight) %>% 
  group_by(Country, elec_SDG, elec_AF, Group, elec_affordability_af_plot, availability_af_plot, srv_af_plot) %>% 
  srvyr::survey_count() %>% 
  group_by(Country) %>% 
  mutate(prop = n / sum(n),
         Country = factor(Country, levels = c("Rwanda", "Cambodia", "Myanmar", "Honduras", 
                                              "Ethiopia", "Nepal", "Kenya",
                                              "Niger", "STP", "Zambia")),
         across(matches("plot"), ~factor(., levels = c("Unserved", "Missing", "Not Decent", "Decent")))
  )

nat_elecalluvial %>% 
  ggplot(aes(axis1 = elec_SDG, axis2 = elec_AF, axis3 = elec_affordability_af_plot, axis4 = availability_af_plot, 
             axis5 = srv_af_plot,
             y = prop)) +
  scale_x_discrete(limits = c("SDG7.1.1", "AF", "Affordability",
                              "Availability", "Services"), expand = c(.2, .05)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  scale_fill_manual(values = c( "#0571b0","#fe9929","#88419d","#e31a1c", "grey20"), 
                    breaks = c("Affordability", "Availability", "Services", "Several", "Missing")) + 
  geom_alluvium(aes(fill = Group)) + 
  geom_stratum() + 
  geom_text(stat = "stratum", aes(label = after_stat(stratum))) + 
  facet_wrap(~Country, scales = "free_x", ncol = 2) +
  labs(fill = "Households considered 'served' by SDG7.1.1, nevertheless constrained by: ",
       y = "Share of households") +
  theme_bw() +
  theme(legend.position = "top")

ggsave(here("Manuscript","Figures","AFvsSDG_elec_sankey.png"),
       device = "png", width = 12, height = 12, units = "in")

# Comparing AF and SDG indicators at national level (cooking) ------------------

# Create barplots comparing CF and AF, and visualise missing data for transparency
allcountry_cook_nataf <- allcountries_cleaned %>%
  mutate(
    across(c(cleanfuelmths_af, cook_affordability_af, primarystoveclean_af),
           ~case_when(
             cook_sdg == 0 ~ "Unserved",
             . == 1 ~ "Decent",
             . == 0 ~ "Not Decent",
             TRUE ~ "Missing"), .names = "{.col}_plot"),
    cook_SDG = factor(case_when(
      cook_sdg == 1 ~ "Served",
      cook_sdg == 0 ~ "Unserved"), levels = c("Unserved", "Missing", "Served")),
    cook_AF = factor(case_when(
      cook_af == 1 ~ "Decent",
      cook_af == 0 ~ "Not Decent",
      cook_sdg == 0 ~ "Unserved",
      TRUE ~ "Missing"), levels = c("Unserved", "Missing", "Not Decent", "Decent")),
    cook_SDGandAF = factor(case_when(
      cook_af == 1 & cook_sdg == 1 ~ "Served (CFandAF)",
      cook_sdg == 1 ~ "Served (CFonly)",
      cook_sdg == 0 ~ "Unserved"), levels = c("Unserved", "Served (CFonly)", "Served (CFandAF)")),
    Group = case_when(
      cook_SDGandAF == "Served (CFandAF)" ~ "NA",
      cook_SDGandAF == "Unserved" ~ "NA",
      cook_affordability_af_plot %in% c("Decent") & 
        cleanfuelmths_af_plot %in% c("Decent") & 
        primarystoveclean_af_plot == "Not Decent" ~ "Time-use",
      cook_affordability_af_plot %in% c("Decent") & 
        cleanfuelmths_af_plot == "Not Decent" & 
        primarystoveclean_af_plot %in% c("Decent") ~ "Availability",
      cook_affordability_af_plot == "Not Decent" & 
        cleanfuelmths_af_plot %in% c("Decent") & 
        primarystoveclean_af_plot %in% c("Decent") ~ "Affordability",
      cook_AF == "Missing" & cook_SDG == "Served" ~ "Missing",
      TRUE ~ "Several"),
    Group = factor(Group, levels = c("Affordability", "Availability", "Time-use", "Several", "Missing")),
    Region = ifelse(rural == 1, "Rural", "Urban")
  )

nat_cook_group <- allcountry_cook_nataf %>% 
  transmute(
    Country = Country, 
    "CF Aggregate" = cook_sdg,
    Affordability = cook_affordability_af,
    Availability = cleanfuelmths_af, 
    "Time-use" = primarystoveclean_af,
    strata = strata,
    weight = weight) %>% 
  srvyr::as_survey(., strata = strata, weights = weight) %>% 
  group_by(Country) %>% 
  summarise(`CF Aggregate` = survey_mean(`CF Aggregate`),
            across(c(Affordability, Availability, `Time-use`), ~survey_mean(., na.rm = T))) %>% 
  select(-matches("_se")) %>% 
  pivot_longer(cols = c(Affordability, Availability, `Time-use`)) %>% 
  mutate(Country = factor(Country, levels = c("Rwanda", "Cambodia", "Myanmar", "Honduras", 
                                              "Ethiopia", "Nepal", "Kenya",
                                              "Niger", "STP", "Zambia"))
  )

cook_a <- nat_cook_group %>% 
  ggplot(aes(Country, value*`CF Aggregate`, fill = name, label = ifelse(`CF Aggregate` > 0.1,scales::percent(value, accuracy = 1), ''))) +
  geom_col(aes(Country, `CF Aggregate`/3), inherit.aes = F, alpha = 0.5) +
  geom_text(aes(Country, `CF Aggregate`, label = scales::percent(`CF Aggregate`, accuracy = 1)), 
            inherit.aes = F, nudge_x = 0.1, hjust  = -0.4, alpha = 0.2, vjust = 1) +
  geom_col(position = position_dodge2()) +
  geom_text(position = position_dodge2(width = 0.9), show.legend = F, size = 3) +
  coord_flip() +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(0,1)) +
  scale_fill_manual(values = c("#0571b0","#fe9929","#88419d","#e31a1c"), 
                    breaks = c("Affordability", "Availability", "Time-use")) + 
  labs(y = "Share of households",
       subtitle = "SDG7.1.2 (Clean Cooking) Status & AF Dimensions",
       fill = "AF Dimension") +
  theme_bw()

wrap_plots(elec_a, cook_a, ncol = 1) & guides(fill = guide_legend(reverse = T))

ggsave(here("Manuscript","Figures","AFvsSDG_barchart.png"),
       device = "png", width = 7, height = 8, units = "in")

nat_cookalluvial <- allcountry_cook_nataf %>% 
  srvyr::as_survey(., strata = strata, weights = weight) %>% 
  group_by(Country, cook_SDG, cook_AF, Group, cook_affordability_af_plot, cleanfuelmths_af_plot, primarystoveclean_af_plot) %>% 
  srvyr::survey_count() %>% 
  group_by(Country) %>% 
  mutate(prop = n / sum(n),
         Country = factor(Country, levels = c("Rwanda", "Cambodia", "Myanmar", "Honduras", 
                                              "Ethiopia", "Nepal", "Kenya",
                                              "Niger", "STP", "Zambia")),
         across(matches("plot"), ~factor(., levels = c("Unserved", "Missing", "Not Decent", "Decent")))
  )

nat_cookalluvial %>% 
  ggplot(aes(axis1 = cook_SDG, axis2 = cook_AF, axis3 = cook_affordability_af_plot, axis4 = cleanfuelmths_af_plot, 
             axis5 = primarystoveclean_af_plot,
             y = prop)) +
  scale_x_discrete(limits = c("SDG7.1.2", "AF", "Affordability",
                              "Availability", "Time-use"), expand = c(.2, .05)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  scale_fill_manual(values = c( "#0571b0","#fe9929","#88419d","#e31a1c", "grey20"), 
                    breaks = c("Affordability", "Availability", "Time-use", "Several", "Missing")) + 
  geom_alluvium(aes(fill = Group)) + 
  geom_stratum() + 
  geom_text(stat = "stratum", aes(label = after_stat(stratum))) + 
  facet_wrap(~Country, scales = "free_x", ncol = 2) +
  labs(fill = "Households considered 'served' by SDG7.1.2, nevertheless constrained by: ",
       y = "Share of households") +
  theme_bw() +
  theme(legend.position = "top")

ggsave(here("Manuscript","Figures","AFvsSDG_cook_sankey.png"),
       device = "png", width = 12, height = 12, units = "in")

# ECDF plots showing distributions for CFonly households (electricity) ---------

a <- allcountries_cleaned %>%
  filter(!is.na(elec_af), elec_sdg == 1) %>% 
  mutate(elexp_total = ifelse(elec_sdg == 0, NA, elexp_total),
         elexpshare = elexp_total / medexp_quint) %>% 
  ggplot(aes(elexpshare, colour = Country, label = Country, weight = weight)) +
  stat_ecdf_wtd(show.legend = F) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(xlim = c(0,0.2)) +
  labs(x = NULL, 
       y = "Cumulative probability (SDG7.1.1 households)",
       subtitle = "Relative electricity costs (less than ... %)") +
  theme_bw() +
  theme(legend.position = "top")

a <- a +
  geom_label_repel(aes(x = round(x,2), y = y, label = label,
                      colour = label), nudge_x = +0.1,
                  inherit.aes = F, min.segment.length = 0, show.legend = F,
             data = layer_data(a) %>% group_by(label) %>% 
               slice(which.min(abs(x - 0.05))) %>% 
               filter((1-y) > 0.1),
             segment.curvature = -0.1,
             segment.ncp = 3,
             segment.angle = 20,
             segment.linetype = 2,
             segment.square = T)

b <- allcountries_cleaned %>% 
  filter(!is.na(elec_af), elec_sdg == 1) %>% 
  select(Country, availability, weight) %>% 
  ggplot(aes(availability, colour = Country, label = Country, weight = weight)) +
  stat_ecdf_wtd(aes(y = 1 - ..y..), show.legend = F, geom = "line") +
  scale_x_continuous(breaks = seq(24,0,-4)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(xlim = c(24,0)) +
  labs(x = NULL, 
       y = "Cumulative probability (SDG7.1.1 households)",
       subtitle = "Electricity availability (at least ... hours)") +
  theme_bw() +
  theme(legend.position = "top")

b <- b +
  geom_label_repel(aes(x = round(x,2), y = y, label = label,
                       colour = label), box.padding = unit(0.1, "inches"), nudge_x = -10, nudge_y = 0.1,
                   inherit.aes = F, min.segment.length = 0, show.legend = F,
                   data = layer_data(b) %>% group_by(label) %>% 
                     slice(which.min(abs(x - 16))) %>% 
                     filter((y) > 0.1),
                   segment.curvature = -0.1,
                   segment.ncp = 3,
                   segment.angle = 20,
                   segment.linetype = 2,
                   segment.square = T)

c <- allcountries_cleaned %>% 
  filter(!is.na(elec_af), elec_sdg == 1) %>% 
  srvyr::as_survey(., strata = strata, weights = weight) %>% 
  group_by(Country) %>% 
  summarise(
    Affluent = survey_mean(srv_affluent),
    Decent = survey_mean(srv_decent) + Affluent,
    Minimum = survey_mean(srv_minimum) + Decent,
    None = survey_mean(srv_none) + Minimum,
  ) %>% 
  select(-matches("_se")) %>% 
  pivot_longer(cols = None:Affluent) %>% 
  mutate(name = factor(name, levels = c("Affluent", "Decent", "Minimum", 
                                        "None")))

c <- c %>% 
  ggplot(aes(name, value, group = Country, colour = Country)) +
  geom_line(show.legend = F) +
  geom_point(show.legend = F) + 
  geom_label_repel(aes(label = Country),
                   box.padding = unit(0.1, "inches"),
                   data = c %>% filter(name == "Decent"), show.legend = F,
                   segment.curvature = -0.1,
                   segment.ncp = 2,
                   segment.angle = 20,
                   segment.linetype = 2,
                   segment.square = T) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(0,1)) +
  labs(x = NULL, 
       y = "Cumulative probability (SDG7.1.1 households)",
       subtitle = "Access to energy services (at least ... services)") +
  theme_bw() +
  theme(legend.position = "bottom")

# ECDF plots showing distributions for CFonly households (cooking) ---------

d <- allcountries_cleaned %>%
  filter(!is.na(cook_af), cook_sdg == 1) %>% 
  mutate(cookexp_total = ifelse(cook_sdg == 0, NA, cookexp_total),
         cookexpshare = cookexp_total / medexp_quint) %>% 
  filter(cookexpshare > 0) %>% 
  ggplot(aes(cookexpshare, colour = Country, label = Country, weight = weight)) +
  stat_ecdf_wtd(show.legend = F) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(xlim = c(0,0.2)) +
  labs(x = NULL, 
       y = "Cumulative probability (SDG7.1.2 households)",
       subtitle = "Relative clean cookfuel costs (less than ... %)") +
  theme_bw() +
  theme(legend.position = "top")

d <- d +
  geom_label_repel(aes(x = round(x,2), y = y, label = label,
                       colour = label), nudge_x = +1,
                   inherit.aes = F, min.segment.length = 0, show.legend = F,
                   data = layer_data(d) %>% group_by(label) %>% 
                     slice(which.min(abs(x - 0.05))) %>% 
                     filter((1-y) > 0.1),
                   segment.curvature = -0.1,
                   segment.ncp = 3,
                   segment.angle = 20,
                   segment.linetype = 2,
                   segment.square = T)

e <- allcountries_cleaned %>% 
  filter(!is.na(cook_af), cook_sdg == 1) %>% 
  ggplot(aes(cleanfuelmths, colour = Country, label = Country, weight = weight)) +
  stat_ecdf_wtd(aes(y = 1 - ..y..), show.legend = F) +
  scale_x_continuous(breaks = seq(12,0,-2)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(xlim = c(12,0)) +
  labs(x = NULL, 
       y = "Cumulative probability (SDG7.1.2 households)",
       subtitle = "Clean cookfuel availability (at least ... months)") +
  theme_bw() +
  theme(legend.position = "top")

f <- allcountries_cleaned %>% 
  filter(!is.na(cook_af), cook_sdg == 1) %>% 
  ggplot(aes(stoveuseminutes_clean / (stoveuseminutes_clean + stoveuseminutes_solids), 
             colour = Country, label = Country, weights = weight)) +
  stat_ecdf_wtd(aes(y = 1 - ..y..), show.legend = F) +
  scale_x_continuous(breaks = seq(1,0,-0.2),
                     labels = scales::percent_format(accuracy = 1)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(xlim = c(1,0)) +
  labs(x = NULL, 
       y = "Cumulative probability (SDG7.1.2 households)",
       subtitle = "Relative time using clean stove (at least ... %)") +
  theme_bw() +
  theme(legend.position = "top")

f <- f +
  geom_label_repel(aes(x = round(x,2), y = y, label = label,
                       colour = label), box.padding = unit(0.1, "inches"), nudge_x = -1, nudge_y = 0.1,
                   inherit.aes = F, min.segment.length = 0, show.legend = F,
                   data = layer_data(f) %>% group_by(label) %>% 
                     filter(x <= 0.8) %>% 
                     slice(which.min(abs(x - 0.8))),
                   segment.curvature = -0.1,
                   segment.ncp = 3,
                   segment.angle = 20,
                   segment.linetype = 2,
                   segment.square = T)

wrap_plots(c,b,a,f,e,d, ncol = 3)

ggsave(here("Manuscript","Figures","AFvsSDG_ecdf.png"),
       device = "png", width = 12, height = 8, units = "in")

# Analysis of households considered served by both SDG7.1.1 and SDG7.1.2 -------

fullserved <- allcountries_cleaned %>% 
  mutate(served_both = as.numeric(elec_sdg == 1 & cook_sdg == 1))

a <- fullserved %>% 
  group_by(Country) %>% 
  filter(served_both == 1) %>% 
  mutate(expshare = (cookexp_total + elexp_total) / medexp_quint) %>% 
  ggplot(aes(expshare, colour = Country, label = Country, weight = weight)) +
  stat_ecdf_wtd(show.legend = F) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(xlim = c(0,0.2)) +
  labs(x = NULL, 
       y = "Cumulative probability (SDG7.1 (both) households)",
       subtitle = "Relative electricity and clean cookfuel costs (less than ... %)") +
  theme_bw() +
  theme(legend.position = "top")

a +
  geom_label_repel(aes(x = round(x,2), y = y, label = label,
                       colour = label), nudge_x = +0.2,
                   inherit.aes = F, min.segment.length = 0, show.legend = F,
                   data = layer_data(a) %>% group_by(label) %>% 
                     slice(which.min(abs(x - 0.1))),
                   segment.curvature = -0.1,
                   segment.ncp = 3,
                   segment.angle = 20,
                   segment.linetype = 2,
                   segment.square = T)

ggsave(here("Manuscript","Figures","AFvsSDG_ecdf_allserved.png"),
       device = "png", width = 6, height = 6, units = "in")

# Aggregates to compare with the MTF reports -----------------------------------

mtf_comparison_elec <- allcountries_cleaned %>% 
  mutate(
    Country = factor(Country, levels = c("Rwanda", "Cambodia", "Myanmar", "Honduras", 
                                         "Ethiopia", "Nepal", "Kenya",
                                         "Niger", "STP", "Zambia")),
    Country = fct_rev(Country)
    ) %>% 
  srvyr::as_survey(., strata = strata, weights = weight) %>% 
  group_by(Country) %>% 
  summarise(Grid = srvyr::survey_mean(grid),
         OG = survey_mean(grid == 0 & (mg == 1 | solar == 1 | batt == 1)),
         Availability = survey_mean(availability_af, na.rm = T),
         Affordability = survey_mean(elec_affordability_af, na.rm = T)
         ) %>% 
  select(-matches("_se")) %>% 
  mutate(across(c(Grid:Affordability), ~scales::percent(.,accuracy = 2))) 

report_data_elec <- tibble::tribble(
                   ~Country, ~Grid_Report, ~OG_Report, ~Availability_Report, ~Affordability_Report,
                   "Zambia",        "38%",       "5%",                "87%",                 "84%",
                      "STP",        "69%",       "2%",                "92%",                   "-",
                    "Niger",        "16%",       "4%",                "85%",                 "58%",
                    "Kenya",        "39%",      "26%",                "70%",                 "98%",
                    "Nepal",        "72%",      "23%",                "78%",                "100%",
                 "Ethiopia",        "33%",      "24%",                "51%",                "100%",
                 "Honduras",        "84%",       "4%",                "92%",                 "96%",
                  "Myanmar",        "39%",      "48%",                "51%",                "100%",
                 "Cambodia",        "72%",      "26%",                "75%",                 "91%",
                   "Rwanda",        "24%",       "5%",                "77%",                   "-"
                 )

mtf_comparison_elec <- left_join(mtf_comparison_elec, report_data_elec) %>% 
  select(Country,
         matches("Grid"), matches("OG"), matches("Availability"), 
         matches("Affordability"))

mtf_comparison_cook <- allcountries_cleaned %>% 
  mutate(
    Country = factor(Country, levels = c("Rwanda", "Cambodia", "Myanmar", "Honduras", 
                                         "Ethiopia", "Nepal", "Kenya",
                                         "Niger", "STP", "Zambia")),
    Country = fct_rev(Country),
  ) %>% 
  srvyr::as_survey(., strata = strata, weights = weight) %>% 
  group_by(Country) %>% 
  summarise(BLEN = srvyr::survey_mean(cook_sdg),
            Availability = survey_mean(cleanfuelmths_af, na.rm = T),
            Affordability = survey_mean(cook_affordability_af, na.rm = T)
  ) %>% 
  select(-matches("_se")) %>% 
  mutate(across(c(BLEN:Affordability), ~scales::percent(.,accuracy = 2))) 

report_data_cook <- tibble::tribble(
                        ~Country, ~BLEN_Report, ~Availability_Report, ~Affordability_Report,
                        "Zambia",        "17%",                "97%",                 "75%",
                           "STP",         "1%",                  "-",                   "-",
                         "Niger",         "5%",                "93%",                 "93%",
                         "Kenya",        "16%",                  "-",                 "79%",
                         "Nepal",        "28%",                "99%",                 "86%",
                      "Ethiopia",         "4%",                "96%",                 "72%",
                      "Honduras",        "36%",                "99%",                 "85%",
                       "Myanmar",        "24%",                  "-",                 "81%",
                      "Cambodia",        "33%",                "84%",                 "94%",
                        "Rwanda",         "0%",                  "-",                   "-"
                      )


mtf_comparison_cook <- left_join(mtf_comparison_cook, report_data_cook) %>% 
  select(Country, BLEN, BLEN_Report, matches("Availability"),
          matches("Affordability"))

stargazer::stargazer(mtf_comparison_elec, 
                     out = here("Manuscript", "Tables", "mtfcompare_elec.tex"),
                     summary = F, rownames = F, float = F)

stargazer::stargazer(mtf_comparison_cook, 
                     out = here("Manuscript", "Tables", "mtfcompare_cook.tex"),
                     summary = F, rownames = F, float = F)

# Regression analysis ----------------------------------------------------------

# Subset dataframe (note: Rwanda and STP removed as no expenditure data available)
lpm_df_elec <- allcountry_elec_nataf %>% 
  mutate(across(c(rural, expgrp_quint), ~factor(.)),
         PSU = paste0(Country,psu),
         weight = as.numeric(weight)) %>% 
  filter(elec_SDG != "Unserved")

lpm_df_cook <- allcountry_cook_nataf %>%
  mutate(across(c(rural, expgrp_quint), ~factor(.)),
         PSU = paste0(Country,psu),
         weight = as.numeric(weight)) %>% 
  filter(cook_SDG != "Unserved")

a <- fixest::feols(
  fml = c(srv_af, availability_af, elec_affordability_af) ~ 
    expgrp_quint | Country^rural^admin2,
  data = lpm_df_elec,
  weights = ~weight,
  cluster = ~PSU)

b <- fixest::feols(
  fml = c(primarystoveclean_af, cleanfuelmths_af, cook_affordability_af) ~ 
    expgrp_quint | Country^rural^admin2,
  data = lpm_df_cook,
  weights = ~weight,
  cluster = ~PSU)

etable(a, digits = 3, tex = T,
       dict = c(rural = "Rural", admin2 = "AdminLevel2", 
                expgrp_quint2 = "ExpenditureQuintile2",
                expgrp_quint3 = "ExpenditureQuintile3",
                expgrp_quint4 = "ExpenditureQuintile4",
                expgrp_quint5 = "ExpenditureQuintile5",
                availability_af = "Decent Availability",
                srv_af = "Decent Services",
                elec_affordability_af = "Decent Affordability"), 
       file = here("Manuscript", "Tables", "lpm_elec_allfe.tex"))

etable(b, digits = 3, tex = T, 
       dict = c(rural = "Rural", admin2 = "AdminLevel2", 
                expgrp_quint2 = "ExpenditureQuintile2",
                expgrp_quint3 = "ExpenditureQuintile3",
                expgrp_quint4 = "ExpenditureQuintile4",
                expgrp_quint5 = "ExpenditureQuintile5",
                cleanfuelmths_af = "Decent Availability",
                primarystoveclean_af = "Decent Time-use",
                cook_affordability_af = "Decent Affordability"), 
       file = here("Manuscript", "Tables", "lpm_cook_allfe.tex"))

# Sensitivity analysis ---------------------------------------------------------
# Re-analysis setting electricity services to tier 3 (affluent) in the AF

allcountries_cleaned <- readr::read_csv(
  here::here("Data", "processed", "allcountries_cleaned.csv"),
  col_types = cols(.default = col_double(), HHID = col_factor(),
                   Country = col_factor(), 
                   admin1 = col_factor(), admin2 = col_factor(), 
                   strata = col_factor(), psu = col_factor()))

# Calculate AF decent electricity access
allcountries_cleaned <- allcountries_cleaned %>% 
  rowwise() %>% 
  mutate(
    # first create AF dimensions as-is
    srv_af = as.numeric(sum(srv_affluent, na.rm = T) > 0),
    # now calculate af
    elec_af = as.numeric(availability_af == 1 & srv_af == 1 & elec_affordability_af == 1),
    elec_af = ifelse(is.na(availability_af) | is.na(srv_af) | is.na(elec_affordability_af), NA, elec_af)
  ) %>%
  ungroup()

# Set AF dimensions to never be better than the CF (e.g. fix case where supply is "affordable" because household has no access to electricity)
allcountries_cleaned <- allcountries_cleaned %>% 
  mutate(
    across(c(srv_af), ~ifelse(elec_sdg == 0,NA,.))
  )

# Create barplots comparing CF and AF, and visualise missing data for transparency
allcountry_elec_nataf <- allcountries_cleaned %>%
  mutate(
    across(c(availability_af, elec_affordability_af, srv_af),
           ~case_when(
             elec_sdg == 0 ~ "Unserved",
             . == 1 ~ "Decent",
             . == 0 ~ "Not Decent",
             TRUE ~ "Missing"), .names = "{.col}_plot"),
    elec_SDG = factor(case_when(
      elec_sdg == 1 ~ "Served",
      elec_sdg == 0 ~ "Unserved"), levels = c("Unserved", "Missing", "Served")),
    elec_AF = factor(case_when(
      elec_af == 1 ~ "Decent",
      elec_af == 0 ~ "Not Decent",
      elec_sdg == 0 ~ "Unserved",
      TRUE ~ "Missing"), levels = c("Unserved", "Missing", "Not Decent", "Decent")),
    elec_SDGandAF = factor(case_when(
      elec_af == 1 & elec_sdg == 1 ~ "Served (CFandAF)",
      elec_sdg == 1 ~ "Served (CFonly)",
      elec_sdg == 0 ~ "Unserved"), levels = c("Unserved", "Served (CFonly)", "Served (CFandAF)")),
    Group = case_when(
      elec_SDGandAF == "Served (CFandAF)" ~ "NA",
      elec_SDGandAF == "Unserved" ~ "NA",
      elec_affordability_af_plot %in% c("Decent") & 
        availability_af_plot %in% c("Decent") & 
        srv_af_plot == "Not Decent" ~ "Services",
      elec_affordability_af_plot %in% c("Decent") & 
        availability_af_plot == "Not Decent" & 
        srv_af_plot %in% c("Decent") ~ "Availability",
      elec_affordability_af_plot == "Not Decent" & 
        availability_af_plot %in% c("Decent") & 
        srv_af_plot %in% c("Decent") ~ "Affordability",
      elec_AF == "Missing" & elec_SDG == "Served" ~ "Missing",
      TRUE ~ "Several"),
    Group = factor(Group, levels = c("Affordability", "Availability", "Services", "Several", "Missing")),
    Region = ifelse(rural == 1, "Rural", "Urban")
  ) 

nat_elec_group <- allcountry_elec_nataf %>% 
  transmute(
    Country = Country, 
    "CF Aggregate" = elec_sdg,
    Affordability = elec_affordability_af,
    Availability = availability_af, 
    Services = srv_af,
    strata = strata,
    weight = weight) %>% 
  srvyr::as_survey(., strata = strata, weights = weight) %>% 
  group_by(Country) %>% 
  summarise(`CF Aggregate` = survey_mean(`CF Aggregate`),
            across(c(Affordability, Availability, Services), ~survey_mean(., na.rm = T))) %>% 
  select(-matches("_se")) %>% 
  pivot_longer(cols = c(Affordability, Availability, Services)) %>% 
  mutate(Country = factor(Country, levels = c("Rwanda", "Cambodia", "Myanmar", "Honduras", 
                                              "Ethiopia", "Nepal", "Kenya",
                                              "Niger", "STP", "Zambia"))
  )

nat_elec_group %>% 
  ggplot(aes(Country, value*`CF Aggregate`, fill = name, label = ifelse(value > 0.01,scales::percent(value, accuracy = 1), ''))) +
  geom_col(aes(Country, `CF Aggregate`/3), inherit.aes = F, alpha = 0.5) +
  geom_text(aes(Country, `CF Aggregate`, label = scales::percent(`CF Aggregate`, accuracy = 1)), 
            inherit.aes = F, nudge_x = 0.1, hjust  = -0.4, alpha = 0.2, vjust = 1) +
  geom_col(position = position_dodge2()) +
  geom_text(position = position_dodge2(width = 0.9), show.legend = F, size = 3) +
  coord_flip() +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(0,1)) +
  scale_fill_manual(values = c("#0571b0","#fe9929","#88419d","#e31a1c"), 
                    breaks = c("Affordability", "Availability", "Services")) + 
  labs(y = "Share of households",
       subtitle = "SDG7.1.1 (Electricity) Status & AF Dimensions",
       fill = "AF Dimension") +
  theme_bw()

ggsave(here("Manuscript","Figures","AFvsSDG_barchart_elec_sens.png"),
       device = "png", width = 7, height = 4, units = "in")

# Further analysis -------------------------------------------------------------

allcountries_cleaned <- readr::read_csv(
  here::here("Data", "processed", "allcountries_cleaned.csv"),
  col_types = cols(.default = col_double(), HHID = col_factor(),
                   Country = col_factor(), 
                   admin1 = col_factor(), admin2 = col_factor(), 
                   strata = col_factor(), psu = col_factor()))

# Create barplots comparing CF and AF, and visualise missing data for transparency
allcountry_elec_nataf <- allcountries_cleaned %>%
  mutate(
    across(c(availability_af, elec_affordability_af, srv_af),
           ~case_when(
             elec_sdg == 0 ~ "Unserved",
             . == 1 ~ "Decent",
             . == 0 ~ "Not Decent",
             TRUE ~ "Missing"), .names = "{.col}_plot"),
    elec_SDG = factor(case_when(
      elec_sdg == 1 ~ "Served",
      elec_sdg == 0 ~ "Unserved"), levels = c("Unserved", "Missing", "Served")),
    elec_AF = factor(case_when(
      elec_af == 1 ~ "Decent",
      elec_af == 0 ~ "Not Decent",
      elec_sdg == 0 ~ "Unserved",
      TRUE ~ "Missing"), levels = c("Unserved", "Missing", "Not Decent", "Decent")),
    elec_SDGandAF = factor(case_when(
      elec_af == 1 & elec_sdg == 1 ~ "Served (CFandAF)",
      elec_sdg == 1 ~ "Served (CFonly)",
      elec_sdg == 0 ~ "Unserved"), levels = c("Unserved", "Served (CFonly)", "Served (CFandAF)")),
    Group = case_when(
      elec_SDGandAF == "Served (CFandAF)" ~ "NA",
      elec_SDGandAF == "Unserved" ~ "NA",
      elec_affordability_af_plot %in% c("Decent") & 
        availability_af_plot %in% c("Decent") & 
        srv_af_plot == "Not Decent" ~ "Services",
      elec_affordability_af_plot %in% c("Decent") & 
        availability_af_plot == "Not Decent" & 
        srv_af_plot %in% c("Decent") ~ "Availability",
      elec_affordability_af_plot == "Not Decent" & 
        availability_af_plot %in% c("Decent") & 
        srv_af_plot %in% c("Decent") ~ "Affordability",
      elec_AF == "Missing" & elec_SDG == "Served" ~ "Missing",
      TRUE ~ "Several"),
    Group = factor(Group, levels = c("Affordability", "Availability", "Services", "Several", "Missing")),
    Region = ifelse(rural == 1, "Rural", "Urban")
  ) 

nat_elec_group <- allcountry_elec_nataf %>% 
  filter(!is.na(expgrp_quint)) %>% 
  transmute(
    ExpQuint = factor(expgrp_quint),
    Country = Country, 
    Access = elec_sdg,
    Affordability = elec_affordability_af,
    Availability = availability_af, 
    Services = srv_af,
    strata = strata,
    weight = weight) %>% 
  srvyr::as_survey(., strata = strata, weights = weight) %>% 
  group_by(Country, ExpQuint) %>% 
  summarise(Access = survey_mean(Access),
            across(c(Affordability, Availability, Services), ~survey_mean(., na.rm = T))) %>% 
  select(-matches("_se")) %>% 
  mutate(across(c(Affordability, Availability, Services), ~.*Access)) %>% 
  pivot_longer(cols = c(Access, Affordability, Availability, Services)) %>% 
  mutate(Country = factor(Country, levels = c("Rwanda", "Cambodia", "Myanmar", "Honduras", 
                                              "Ethiopia", "Nepal", "Kenya",
                                              "Niger", "STP", "Zambia")),
         name = factor(name, levels = c("Access", "Services", "Availability", "Affordability"))
  )

allcountry_cook_nataf <- allcountries_cleaned %>%
  mutate(
    across(c(cleanfuelmths_af, cook_affordability_af, primarystoveclean_af),
           ~case_when(
             cook_sdg == 0 ~ "Unserved",
             . == 1 ~ "Decent",
             . == 0 ~ "Not Decent",
             TRUE ~ "Missing"), .names = "{.col}_plot"),
    cook_SDG = factor(case_when(
      cook_sdg == 1 ~ "Served",
      cook_sdg == 0 ~ "Unserved"), levels = c("Unserved", "Missing", "Served")),
    cook_AF = factor(case_when(
      cook_af == 1 ~ "Decent",
      cook_af == 0 ~ "Not Decent",
      cook_sdg == 0 ~ "Unserved",
      TRUE ~ "Missing"), levels = c("Unserved", "Missing", "Not Decent", "Decent")),
    cook_SDGandAF = factor(case_when(
      cook_af == 1 & cook_sdg == 1 ~ "Served (CFandAF)",
      cook_sdg == 1 ~ "Served (CFonly)",
      cook_sdg == 0 ~ "Unserved"), levels = c("Unserved", "Served (CFonly)", "Served (CFandAF)")),
    Group = case_when(
      cook_SDGandAF == "Served (CFandAF)" ~ "NA",
      cook_SDGandAF == "Unserved" ~ "NA",
      cook_affordability_af_plot %in% c("Decent") & 
        cleanfuelmths_af_plot %in% c("Decent") & 
        primarystoveclean_af_plot == "Not Decent" ~ "Time-use",
      cook_affordability_af_plot %in% c("Decent") & 
        cleanfuelmths_af_plot == "Not Decent" & 
        primarystoveclean_af_plot %in% c("Decent") ~ "Availability",
      cook_affordability_af_plot == "Not Decent" & 
        cleanfuelmths_af_plot %in% c("Decent") & 
        primarystoveclean_af_plot %in% c("Decent") ~ "Affordability",
      cook_AF == "Missing" & cook_SDG == "Served" ~ "Missing",
      TRUE ~ "Several"),
    Group = factor(Group, levels = c("Affordability", "Availability", "Time-use", "Several", "Missing")),
    Region = ifelse(rural == 1, "Rural", "Urban")
  )

nat_cook_group <- allcountry_cook_nataf %>% 
  filter(!is.na(expgrp_quint)) %>% 
  transmute(
    ExpQuint = factor(expgrp_quint),
    Country = Country, 
    Access = cook_sdg,
    Affordability = cook_affordability_af,
    Availability = cleanfuelmths_af, 
    "Time-use" = primarystoveclean_af,
    strata = strata,
    weight = weight) %>% 
  srvyr::as_survey(., strata = strata, weights = weight) %>% 
  group_by(Country, ExpQuint) %>% 
  summarise(Access = survey_mean(Access),
            across(c(Affordability, Availability, `Time-use`), ~survey_mean(., na.rm = T))) %>% 
  select(-matches("_se")) %>% 
  mutate(across(c(Affordability, Availability, `Time-use`), ~.*Access)) %>% 
  pivot_longer(cols = c(Access, Affordability, Availability, `Time-use`)) %>% 
  mutate(Country = factor(Country, levels = c("Rwanda", "Cambodia", "Myanmar", "Honduras", 
                                              "Ethiopia", "Nepal", "Kenya",
                                              "Niger", "STP", "Zambia")),
         name = factor(name, levels = c("Access", "Time-use", "Availability", "Affordability"))
  )

nat_elec_group %>% 
  ggplot(aes(ExpQuint, value, colour = name, group = name, shape = name,
             label = ifelse(value > 0.01,scales::percent(value, accuracy = 1), ''))) +
  geom_line() +
  geom_point() +
  facet_wrap(~Country, scales = "free_x") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(0,1)) +
  labs(y = "Share of households in expenditure quintile",
       subtitle = "SDG7.1.1 (Electricity) Status & AF Dimensions by Expenditure Quintile",
       colour = NULL,
       shape = NULL) +
  theme_bw() +
  theme(legend.position = "bottom")

ggsave(here("Manuscript","Figures","AFvsSDG_quintiles_elec.png"),
       device = "png", width = 7, height = 7, units = "in")

nat_cook_group %>% 
  ggplot(aes(ExpQuint, value, colour = name, group = name, shape = name,
             label = ifelse(value > 0.01,scales::percent(value, accuracy = 1), ''))) +
  geom_line() +
  geom_point() +
  facet_wrap(~Country, scales = "free_x") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(0,1)) +
  labs(y = "Share of households in expenditure quintile",
       subtitle = "SDG7.1.2 (Clean Cooking) Status & AF Dimensions by Expenditure Quintile",
       colour = NULL,
       shape = NULL) +
  theme_bw() +
  theme(legend.position = "bottom")

ggsave(here("Manuscript","Figures","AFvsSDG_quintiles_cook.png"),
       device = "png", width = 7, height = 7, units = "in")
